示例#1
0
 public MultipleWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer <Word, ShapeNode> scorer, IEnumerable <Word> words)
     : base(wordAligner)
 {
     _words     = new ReadOnlyList <Word>(words.ToArray());
     _algorithm = new MultipleAlignmentAlgorithm <Word, ShapeNode>(scorer, _words, GetNodes);
     _algorithm.Compute();
 }
示例#2
0
        private void UpdateSelectedChangeWordPairs(WordPairsViewModel wordPairs)
        {
            IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];

            wordPairs.SelectedCorrespondenceWordPairs.Clear();
            foreach (WordPairViewModel wordPair in wordPairs.WordPairs)
            {
                bool selected = false;
                foreach (AlignedNodeViewModel node in wordPair.AlignedNodes)
                {
                    if (_selectedSoundChange == null)
                    {
                        node.IsSelected = false;
                    }
                    else
                    {
                        SoundContext    lhs  = wordPair.DomainAlignment.ToSoundContext(_segmentPool, 0, node.Column, aligner.ContextualSoundClasses);
                        Ngram <Segment> corr = wordPair.DomainAlignment[1, node.Column].ToNgram(_segmentPool);
                        node.IsSelected = lhs.Equals(_selectedSoundChange.DomainSoundChangeLhs) && corr.Equals(_selectedSoundChange.DomainCorrespondence);
                        if (node.IsSelected)
                        {
                            selected = true;
                        }
                    }
                }

                if (selected)
                {
                    wordPairs.SelectedCorrespondenceWordPairs.Add(wordPair);
                }
            }
        }
示例#3
0
        public WordPairViewModel(IWordAligner aligner, WordPair wordPair, bool areVarietiesInOrder)
        {
            _wordPair = wordPair;
            _areVarietiesInOrder = areVarietiesInOrder;
            _meaning = new MeaningViewModel(_wordPair.Word1.Meaning);
            _variety1 = new VarietyViewModel(_wordPair.VarietyPair.Variety1);
            _variety2 = new VarietyViewModel(_wordPair.VarietyPair.Variety2);

            IWordAlignerResult results = aligner.Compute(_wordPair);
            _alignment = results.GetAlignments().First();
            _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]);
            var nodes = new List<AlignedNodeViewModel>();
            int i = 0;
            for (int column = 0; column < _alignment.ColumnCount; column++)
            {
                string note = null;
                if (i < _wordPair.AlignmentNotes.Count)
                    note = _wordPair.AlignmentNotes[i];
                nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note));
                i++;
            }
            _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]);

            _alignedNodes = new ReadOnlyCollection<AlignedNodeViewModel>(nodes);

            _showInMultipleWordAlignmentCommand = new RelayCommand(ShowInMultipleWordAlignment);
        }
示例#4
0
 public MultipleWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer<Word, ShapeNode> scorer, IEnumerable<Word> words)
     : base(wordAligner)
 {
     _words = new ReadOnlyList<Word>(words.ToArray());
     _algorithm = new MultipleAlignmentAlgorithm<Word, ShapeNode>(scorer, _words, GetNodes);
     _algorithm.Compute();
 }
示例#5
0
        public WordPairViewModel(IWordAligner aligner, WordPair wordPair, bool areVarietiesInOrder)
        {
            _wordPair            = wordPair;
            _areVarietiesInOrder = areVarietiesInOrder;
            _meaning             = new MeaningViewModel(_wordPair.Word1.Meaning);
            _variety1            = new VarietyViewModel(_wordPair.VarietyPair.Variety1);
            _variety2            = new VarietyViewModel(_wordPair.VarietyPair.Variety2);

            IWordAlignerResult results = aligner.Compute(_wordPair);

            _alignment  = results.GetAlignments().First();
            _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]);
            var nodes = new List <AlignedNodeViewModel>();
            int i     = 0;

            for (int column = 0; column < _alignment.ColumnCount; column++)
            {
                string note = null;
                if (i < _wordPair.AlignmentNotes.Count)
                {
                    note = _wordPair.AlignmentNotes[i];
                }
                nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note));
                i++;
            }
            _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]);

            _alignedNodes = new ReadOnlyCollection <AlignedNodeViewModel>(nodes);
        }
示例#6
0
        public SoundContext ToSoundContext(CogProject project, SegmentPool segmentPool)
        {
            IWordAligner aligner  = project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];
            SoundClass   leftEnv  = LeftEnvironment == null ? null : aligner.ContextualSoundClasses.First(sc => sc.Name == LeftEnvironment);
            SoundClass   rightEnv = RightEnvironment == null ? null : aligner.ContextualSoundClasses.First(sc => sc.Name == RightEnvironment);

            return(new SoundContext(leftEnv, new Ngram <Segment>(_target.Select(segmentPool.GetExisting)), rightEnv));
        }
示例#7
0
 private void UpdateCognateCorrespondenceCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > cognateCorrCounts,
                                                Alignment <Word, ShapeNode> alignment)
 {
     for (int column = 0; column < alignment.ColumnCount; column++)
     {
         SoundContext    lhs  = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses);
         Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool);
         cognateCorrCounts[lhs].Increment(corr);
     }
 }
示例#8
0
        public VarietyPair ToVarietyPair(SegmentPool segmentPool, CogProject project)
        {
            var vp = new VarietyPair(project.Varieties[Variety1], project.Varieties[Variety2])
            {
                PhoneticSimilarityScore          = PhoneticSimilarityScore,
                LexicalSimilarityScore           = LexicalSimilarityScore,
                DefaultCorrespondenceProbability = DefaultCorrespondenceProbability
            };
            var wordPairs = new Dictionary <WordPairSurrogate, WordPair>();

            vp.WordPairs.AddRange(_wordPairs.Select(surrogate => wordPairs.GetValue(surrogate, () => surrogate.ToWordPair(project, vp))));
            var soundChanges = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();

            foreach (KeyValuePair <SoundContextSurrogate, Tuple <string[], int>[]> fd in _soundChanges)
            {
                SoundContext ctxt = fd.Key.ToSoundContext(project, segmentPool);
                FrequencyDistribution <Ngram <Segment> > freqDist = soundChanges[ctxt];
                foreach (Tuple <string[], int> sample in fd.Value)
                {
                    Ngram <Segment> corr = sample.Item1 == null ? new Ngram <Segment>() : new Ngram <Segment>(sample.Item1.Select(segmentPool.GetExisting));
                    freqDist.Increment(corr, sample.Item2);
                }
            }
            vp.SoundChangeFrequencyDistribution = soundChanges;
            IWordAligner aligner       = project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];
            int          segmentCount  = vp.Variety2.SegmentFrequencyDistribution.ObservedSamples.Count;
            int          possCorrCount = aligner.ExpansionCompressionEnabled ? (segmentCount * segmentCount) + segmentCount + 1 : segmentCount + 1;

            vp.SoundChangeProbabilityDistribution = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >(soundChanges,
                                                                                                                            (sc, freqDist) => new WittenBellProbabilityDistribution <Ngram <Segment> >(freqDist, possCorrCount));

            foreach (KeyValuePair <string, List <SoundCorrespondenceSurrogate> > kvp in _soundCorrespondenceCollections)
            {
                if (kvp.Value != null)
                {
                    FeatureSymbol pos = null;
                    switch (kvp.Key)
                    {
                    case "onset":
                        pos = CogFeatureSystem.Onset;
                        break;

                    case "nucleus":
                        pos = CogFeatureSystem.Nucleus;
                        break;

                    case "coda":
                        pos = CogFeatureSystem.Coda;
                        break;
                    }
                    vp.SoundCorrespondenceCollections[pos].AddRange(kvp.Value.Select(surrogate => surrogate.ToSoundCorrespondence(segmentPool, wordPairs)));
                }
            }
            return(vp);
        }
示例#9
0
 public PairwiseWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer<Word, ShapeNode> scorer, WordPairAlignerSettings settings, Word word1, Word word2)
     : base(wordAligner)
 {
     _words = new ReadOnlyList<Word>(new [] {word1, word2});
     _algorithm = new PairwiseAlignmentAlgorithm<Word, ShapeNode>(scorer, word1, word2, GetNodes)
         {
             ExpansionCompressionEnabled = settings.ExpansionCompressionEnabled,
             Mode = settings.Mode
         };
     _algorithm.Compute();
 }
示例#10
0
 public override void Save(IWordAligner component, XElement elem)
 {
     var SCAAlign = (SCAAlign) component;
     SaveSettings(SCAAlign.Settings, elem);
     elem.Add(new XElement(ConfigManager.Cog + "RelevantFeatures", SCAAlign.FeatureWeights.Select(kvp =>
         new XElement(ConfigManager.Cog + "RelevantFeature", new XAttribute("ref", kvp.Key.ID), new XAttribute("weight", kvp.Value),
             new XAttribute("vowel", SCAAlign.RelevantVowelFeatures.Contains(kvp.Key)),
             new XAttribute("consonant", SCAAlign.RelevantConsonantFeatures.Contains(kvp.Key)),
             kvp.Key.PossibleSymbols.Select(fs =>
                 new XElement(ConfigManager.Cog + "RelevantValue", new XAttribute("ref", fs.ID), new XAttribute("metric", SCAAlign.ValueMetrics[fs])))))));
 }
示例#11
0
 public PairwiseWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer <Word, ShapeNode> scorer, WordPairAlignerSettings settings, Word word1, Word word2)
     : base(wordAligner)
 {
     _words     = new ReadOnlyList <Word>(new [] { word1, word2 });
     _algorithm = new PairwiseAlignmentAlgorithm <Word, ShapeNode>(scorer, word1, word2, GetNodes)
     {
         ExpansionCompressionEnabled = settings.ExpansionCompressionEnabled,
         Mode = settings.Mode
     };
     _algorithm.Compute();
 }
示例#12
0
        public override void Save(IWordAligner component, XElement elem)
        {
            var aline = (Aline)component;

            SaveSettings(aline.Settings, elem);
            elem.Add(new XElement(ConfigManager.Cog + "RelevantFeatures", aline.FeatureWeights.Select(kvp =>
                                                                                                      new XElement(ConfigManager.Cog + "RelevantFeature", new XAttribute("ref", kvp.Key.ID), new XAttribute("weight", kvp.Value),
                                                                                                                   new XAttribute("vowel", aline.RelevantVowelFeatures.Contains(kvp.Key)),
                                                                                                                   new XAttribute("consonant", aline.RelevantConsonantFeatures.Contains(kvp.Key)),
                                                                                                                   kvp.Key.PossibleSymbols.Select(fs =>
                                                                                                                                                  new XElement(ConfigManager.Cog + "RelevantValue", new XAttribute("ref", fs.ID), new XAttribute("metric", aline.ValueMetrics[fs])))))));
        }
示例#13
0
        private bool M(VarietyPair pair)
        {
            IWordAligner aligner       = _project.WordAligners[AlignerId];
            int          segmentCount  = pair.Variety2.SegmentFrequencyDistribution.ObservedSamples.Count;
            int          possCorrCount = aligner.ExpansionCompressionEnabled ? (segmentCount * segmentCount) + segmentCount + 1 : segmentCount + 1;
            var          cpd           = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >(
                pair.CognateSoundCorrespondenceFrequencyDistribution,
                (sc, fd) => new WittenBellProbabilityDistribution <Ngram <Segment> >(fd, possCorrCount));

            bool converged = true;

            if (pair.CognateSoundCorrespondenceProbabilityDistribution == null ||
                pair.CognateSoundCorrespondenceProbabilityDistribution.Conditions.Count != cpd.Conditions.Count)
            {
                converged = false;
            }
            else
            {
                foreach (SoundContext lhs in cpd.Conditions)
                {
                    IProbabilityDistribution <Ngram <Segment> > probDist = cpd[lhs];
                    IProbabilityDistribution <Ngram <Segment> > oldProbDist;
                    if (!pair.CognateSoundCorrespondenceProbabilityDistribution.TryGetProbabilityDistribution(lhs, out oldProbDist) ||
                        probDist.Samples.Count != oldProbDist.Samples.Count)
                    {
                        converged = false;
                        break;
                    }

                    foreach (Ngram <Segment> correspondence in probDist.Samples)
                    {
                        if (Math.Abs(probDist[correspondence] - oldProbDist[correspondence]) > 0.0001)
                        {
                            converged = false;
                            break;
                        }
                    }

                    if (!converged)
                    {
                        break;
                    }
                }
            }

            if (!converged)
            {
                pair.CognateSoundCorrespondenceProbabilityDistribution = cpd;
                pair.DefaultSoundCorrespondenceProbability             = 1.0 / possCorrCount;
            }

            return(converged);
        }
示例#14
0
        public void Process(VarietyPair data)
        {
            IWordAligner aligner = _project.WordAligners[_alignerId];

            var correspondenceColls = new Dictionary <FeatureSymbol, SoundCorrespondenceCollection>
            {
                { CogFeatureSystem.Onset, new SoundCorrespondenceCollection() },
                { CogFeatureSystem.Nucleus, new SoundCorrespondenceCollection() },
                { CogFeatureSystem.Coda, new SoundCorrespondenceCollection() }
            };

            foreach (WordPair wordPair in data.WordPairs.Where(wp => wp.Cognacy))
            {
                Alignment <Word, ShapeNode> alignment = aligner.Compute(wordPair).GetAlignments().First();
                for (int i = 0; i < alignment.ColumnCount; i++)
                {
                    AlignmentCell <ShapeNode> cell1 = alignment[0, i];
                    AlignmentCell <ShapeNode> cell2 = alignment[1, i];

                    if (!cell1.IsNull && !cell2.IsNull && cell1.Count == 1 && cell2.Count == 1)
                    {
                        SymbolicFeatureValue pos1, pos2;
                        if (cell1.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos1) &&
                            cell2.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos2) &&
                            (FeatureSymbol)pos1 == (FeatureSymbol)pos2)
                        {
                            Ngram <Segment> ngram1 = cell1.ToNgram(_segmentPool);
                            Ngram <Segment> ngram2 = cell2.ToNgram(_segmentPool);
                            Segment         seg1   = ngram1.First;
                            Segment         seg2   = ngram2.First;
                            if (!seg1.Equals(seg2))
                            {
                                SoundCorrespondenceCollection correspondences = correspondenceColls[(FeatureSymbol)pos1];
                                SoundCorrespondence           corr;
                                if (!correspondences.TryGet(seg1, seg2, out corr))
                                {
                                    corr = new SoundCorrespondence(seg1, seg2);
                                    correspondences.Add(corr);
                                }
                                corr.Frequency++;
                                corr.WordPairs.Add(wordPair);
                            }
                        }
                    }
                }
            }

            foreach (KeyValuePair <FeatureSymbol, SoundCorrespondenceCollection> kvp in correspondenceColls)
            {
                data.CognateSoundCorrespondencesByPosition[kvp.Key].ReplaceAll(kvp.Value);
            }
        }
示例#15
0
文件: AlineConfig.cs 项目: rmunn/cog
 public override void Save(IWordAligner component, XElement elem)
 {
     var aline = (Aline) component;
     SaveSettings(aline.Settings, elem);
     elem.Add(new XElement(ConfigManager.Cog + "RelevantFeatures", aline.FeatureWeights.Select(kvp =>
         new XElement(ConfigManager.Cog + "RelevantFeature", new XAttribute("ref", kvp.Key.ID), new XAttribute("weight", kvp.Value),
             new XAttribute("vowel", aline.RelevantVowelFeatures.Contains(kvp.Key)),
             new XAttribute("consonant", aline.RelevantConsonantFeatures.Contains(kvp.Key)),
             kvp.Key.PossibleSymbols.Select(fs =>
                 new XElement(ConfigManager.Cog + "RelevantValue", new XAttribute("ref", fs.ID), new XAttribute("metric", aline.ValueMetrics[fs])))))));
     elem.Add(new XElement(ConfigManager.Cog + "SoundChangeScoringEnabled", aline.Settings.SoundChangeScoringEnabled));
     elem.Add(new XElement(ConfigManager.Cog + "SyllablePositionCostEnabled", aline.Settings.SyllablePositionCostEnabled));
 }
示例#16
0
        private void UpdateCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > counts, Alignment <Word, ShapeNode> alignment)
        {
            if (alignment.NormalizedScore < _initialAlignmentThreshold)
            {
                return;
            }

            for (int column = 0; column < alignment.ColumnCount; column++)
            {
                SoundContext    lhs  = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses);
                Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool);
                counts[lhs].Increment(corr);
            }
        }
示例#17
0
        protected override ReturnCode DoWork(TextReader inputReader, TextWriter outputWriter, TextWriter errorWriter)
        {
            ReturnCode retcode = ReturnCode.Okay;

            if (!RawScores && !NormalizedScores)
            {
                Warnings.Add("Neither raw scores nor normalized scores were selected. Defaulting to normalized.");
                RawScores        = false;
                NormalizedScores = true;
            }
            if (RawScores && NormalizedScores)
            {
                Warnings.Add("Please specify either raw or normalized scores, but not both. Defaulting to normalized.");
                RawScores        = false;
                NormalizedScores = true;
            }

            SetupProject();
            Meaning meaning = MeaningFactory.Create();

            IWordAligner wordAligner = Project.WordAligners["primary"];

            foreach (string line in ReadLines(inputReader))
            {
                string[] wordTexts = line.Split(' ');
                if (wordTexts.Length != 2)
                {
                    Errors.Add(line, "Each line should have two space-separated words in it.");
                    continue;
                }
                Word[] words = wordTexts.Select(wordText => ParseWordOnce(wordText, meaning, Project)).ToArray();
                if (words.Length != 2 || words.Any(w => w == null))
                {
                    Errors.Add(line, "One or more of this line's words failed to parse. Successfully parsed words: {0}",
                               string.Join(", ", words.Where(w => w != null).Select(w => w.StrRep)));
                    continue;
                }
                IWordAlignerResult          result    = wordAligner.Compute(words[0], words[1]);
                Alignment <Word, ShapeNode> alignment = result.GetAlignments().First();
                outputWriter.WriteLine("{0} {1} {2}", words[0].StrRep, words[1].StrRep,
                                       RawScores ? alignment.RawScore : alignment.NormalizedScore);
                if (Verbose)
                {
                    outputWriter.Write(alignment.ToString(Enumerable.Empty <string>()));
                    outputWriter.WriteLine();
                }
            }

            return(retcode);
        }
示例#18
0
        public void Process(VarietyPair varietyPair)
        {
            IWordAligner aligner = _project.WordAligners[_alignerID];

            varietyPair.WordPairs.Clear();
            var counts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();

            foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings)
            {
                Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                if (words1.Length == 1 && words2.Length == 1)
                {
                    Word     word1 = words1.Single();
                    Word     word2 = words2.Single();
                    WordPair wp    = varietyPair.WordPairs.Add(word1, word2);
                    Alignment <Word, ShapeNode> alignment = aligner.Compute(wp).GetAlignments().First();
                    wp.PhoneticSimilarityScore = alignment.NormalizedScore;
                    UpdateCounts(aligner, counts, alignment);
                }
                else if (words1.Length > 0 && words2.Length > 0)
                {
                    WordPair bestWordPair = null;
                    Alignment <Word, ShapeNode> bestAlignment = null;
                    foreach (Word w1 in words1)
                    {
                        foreach (Word w2 in words2)
                        {
                            Alignment <Word, ShapeNode> alignment = aligner.Compute(w1, w2).GetAlignments().First();
                            double score = alignment.NormalizedScore;
                            if (bestWordPair == null || score > bestWordPair.PhoneticSimilarityScore)
                            {
                                bestWordPair = new WordPair(w1, w2)
                                {
                                    PhoneticSimilarityScore = score
                                };
                                bestAlignment = alignment;
                            }
                        }
                    }

                    varietyPair.WordPairs.Add(bestWordPair);
                    UpdateCounts(aligner, counts, bestAlignment);
                }
            }

            varietyPair.SoundChangeFrequencyDistribution = counts;
        }
示例#19
0
 private static void WriteWordPairs(StreamWriter writer, IWordAligner aligner, IEnumerable<WordPair> wordPairs)
 {
     bool first = true;
     foreach (WordPair pair in wordPairs.OrderByDescending(wp => wp.PhoneticSimilarityScore))
     {
         if (!first)
             writer.WriteLine();
         IWordAlignerResult results = aligner.Compute(pair);
         Alignment<Word, ShapeNode> alignment = results.GetAlignments().First();
         writer.Write(pair.Word1.Meaning.Gloss);
         if (!string.IsNullOrEmpty(pair.Word1.Meaning.Category))
             writer.Write(" ({0})", pair.Word1.Meaning.Category);
         writer.WriteLine();
         writer.Write(alignment.ToString(pair.AlignmentNotes));
         writer.WriteLine("Similarity: {0:p}", pair.PhoneticSimilarityScore);
         first = false;
     }
 }
示例#20
0
        public void Export(Stream stream, IWordAligner aligner, VarietyPair varietyPair)
        {
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                writer.WriteLine("Similarity");
                writer.WriteLine("----------");
                writer.WriteLine("Lexical: {0:p}", varietyPair.LexicalSimilarityScore);
                writer.WriteLine("Phonetic: {0:p}", varietyPair.PhoneticSimilarityScore);
                writer.WriteLine();

                writer.WriteLine("Likely cognates");
                writer.WriteLine("--------------");
                WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => wp.Cognacy));
                writer.WriteLine();

                writer.WriteLine("Likely non-cognates");
                writer.WriteLine("-------------------");
                WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => !wp.Cognacy));
                writer.WriteLine();

                writer.WriteLine("Sound correspondences");
                writer.WriteLine("---------------------");
                bool first = true;
                foreach (SoundContext lhs in varietyPair.CognateSoundCorrespondenceProbabilityDistribution.Conditions)
                {
                    if (!first)
                    {
                        writer.WriteLine();
                    }
                    IProbabilityDistribution <Ngram <Segment> > probDist = varietyPair.CognateSoundCorrespondenceProbabilityDistribution[lhs];
                    FrequencyDistribution <Ngram <Segment> >    freqDist = varietyPair.CognateSoundCorrespondenceFrequencyDistribution[lhs];
                    writer.WriteLine(lhs.ToString());
                    foreach (var correspondence in freqDist.ObservedSamples.Select(corr => new { Segment = corr, Probability = probDist[corr], Frequency = freqDist[corr] }).OrderByDescending(corr => corr.Probability))
                    {
                        writer.WriteLine("{0}: {1:p}, {2}", correspondence.Segment, correspondence.Probability, correspondence.Frequency);
                    }
                    first = false;
                }
            }
        }
示例#21
0
        private void E(VarietyPair pair)
        {
            ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[CognateIdentifierId];
            var          cognateCorrCounts       = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();
            IWordAligner aligner      = _project.WordAligners[AlignerId];
            int          cognateCount = 0;
            double       totalScore   = 0;

            foreach (WordPair wordPair in pair.WordPairs)
            {
                IWordAlignerResult alignerResult = aligner.Compute(wordPair);
                cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult);
                Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                if (wordPair.Cognacy)
                {
                    for (int column = 0; column < alignment.ColumnCount; column++)
                    {
                        SoundContext    lhs  = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses);
                        Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool);
                        cognateCorrCounts[lhs].Increment(corr);
                    }
                    cognateCount++;
                }
                wordPair.PhoneticSimilarityScore = alignment.NormalizedScore;
                totalScore += wordPair.PhoneticSimilarityScore;
            }

            pair.CognateCount = cognateCount;
            pair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts;
            if (pair.WordPairs.Count == 0)
            {
                pair.LexicalSimilarityScore  = 0;
                pair.PhoneticSimilarityScore = 0;
            }
            else
            {
                pair.LexicalSimilarityScore  = (double)cognateCount / pair.WordPairs.Count;
                pair.PhoneticSimilarityScore = totalScore / pair.WordPairs.Count;
            }
        }
示例#22
0
        public bool IsMapped(ShapeNode leftNode1, Ngram <Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram <Segment> target2, ShapeNode rightNode2)
        {
            if (_threshold == 0 || target1.Length == 0 || target2.Length == 0)
            {
                return(false);
            }

            IWordAligner aligner = _project.WordAligners[_alignerID];

            foreach (Segment seg1 in target1)
            {
                foreach (Segment seg2 in target2)
                {
                    if (aligner.Delta(seg1.FeatureStruct, seg2.FeatureStruct) <= _threshold)
                    {
                        return(true);
                    }
                }
            }

            return(false);
        }
示例#23
0
        public VarietyPairViewModel(SegmentPool segmentPool, IProjectService projectService, WordPairsViewModel.Factory wordPairsFactory, VarietyPair varietyPair, bool areVarietiesInOrder)
        {
            _segmentPool         = segmentPool;
            _projectService      = projectService;
            _varietyPair         = varietyPair;
            _areVarietiesInOrder = areVarietiesInOrder;

            IWordAligner aligner = projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];

            _cognates = wordPairsFactory();
            foreach (WordPair wp in _varietyPair.WordPairs.Where(wp => wp.AreCognatePredicted))
            {
                _cognates.WordPairs.Add(new WordPairViewModel(aligner, wp, _areVarietiesInOrder));
            }
            _noncognates = wordPairsFactory();
            foreach (WordPair wp in _varietyPair.WordPairs.Where(wp => !wp.AreCognatePredicted))
            {
                _noncognates.WordPairs.Add(new WordPairViewModel(aligner, wp, _areVarietiesInOrder));
            }

            _soundChanges = new ReadOnlyList <SoundChangeViewModel>(_varietyPair.SoundChangeProbabilityDistribution.Conditions.SelectMany(lhs => _varietyPair.SoundChangeProbabilityDistribution[lhs].Samples,
                                                                                                                                          (lhs, segment) => new SoundChangeViewModel(lhs, segment, _varietyPair.SoundChangeProbabilityDistribution[lhs][segment], _varietyPair.SoundChangeFrequencyDistribution[lhs][segment])).ToList());
        }
示例#24
0
        private static void WriteWordPairs(StreamWriter writer, IWordAligner aligner, IEnumerable <WordPair> wordPairs)
        {
            bool first = true;

            foreach (WordPair pair in wordPairs.OrderByDescending(wp => wp.PhoneticSimilarityScore))
            {
                if (!first)
                {
                    writer.WriteLine();
                }
                IWordAlignerResult          results   = aligner.Compute(pair);
                Alignment <Word, ShapeNode> alignment = results.GetAlignments().First();
                writer.Write(pair.Word1.Meaning.Gloss);
                if (!string.IsNullOrEmpty(pair.Word1.Meaning.Category))
                {
                    writer.Write(" ({0})", pair.Word1.Meaning.Category);
                }
                writer.WriteLine();
                writer.Write(alignment.ToString(pair.AlignmentNotes));
                writer.WriteLine("Similarity: {0:p}", pair.PhoneticSimilarityScore);
                first = false;
            }
        }
示例#25
0
        public void Export(Stream stream, IWordAligner aligner, VarietyPair varietyPair)
        {
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                writer.WriteLine("Similarity");
                writer.WriteLine("----------");
                writer.WriteLine("Lexical: {0:p}", varietyPair.LexicalSimilarityScore);
                writer.WriteLine("Phonetic: {0:p}", varietyPair.PhoneticSimilarityScore);
                writer.WriteLine();

                writer.WriteLine("Likely cognates");
                writer.WriteLine("--------------");
                WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => wp.Cognacy));
                writer.WriteLine();

                writer.WriteLine("Likely non-cognates");
                writer.WriteLine("-------------------");
                WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => !wp.Cognacy));
                writer.WriteLine();

                writer.WriteLine("Sound correspondences");
                writer.WriteLine("---------------------");
                bool first = true;
                foreach (SoundContext lhs in varietyPair.CognateSoundCorrespondenceProbabilityDistribution.Conditions)
                {
                    if (!first)
                        writer.WriteLine();
                    IProbabilityDistribution<Ngram<Segment>> probDist = varietyPair.CognateSoundCorrespondenceProbabilityDistribution[lhs];
                    FrequencyDistribution<Ngram<Segment>> freqDist = varietyPair.CognateSoundCorrespondenceFrequencyDistribution[lhs];
                    writer.WriteLine(lhs.ToString());
                    foreach (var correspondence in freqDist.ObservedSamples.Select(corr => new {Segment = corr, Probability = probDist[corr], Frequency = freqDist[corr]}).OrderByDescending(corr => corr.Probability))
                        writer.WriteLine("{0}: {1:p}, {2}", correspondence.Segment, correspondence.Probability, correspondence.Frequency);
                    first = false;
                }
            }
        }
示例#26
0
        public SegmentMappingsTableViewModel(IProjectService projectService, SegmentMappingsTableSegmentPairViewModel.Factory segmentPairFactory, SegmentMappingViewModel.Factory mappingFactory,
                                             IEnumerable <SegmentMappingViewModel> mappings, SoundType soundType, int threshold)
        {
            _threshold = threshold;

            _soundType = soundType;
            FeatureSymbol segmentType;

            switch (_soundType)
            {
            case SoundType.Consonant:
                segmentType = CogFeatureSystem.ConsonantType;
                break;

            case SoundType.Vowel:
                segmentType = CogFeatureSystem.VowelType;
                break;

            default:
                throw new InvalidEnumArgumentException();
            }

            var segmentComparer  = new SegmentComparer();
            var categoryComparer = new SegmentCategoryComparer();

            _segments = new ReadOnlyList <SegmentMappingsTableSegmentViewModel>(projectService.Project.Varieties.SelectMany(v => v.SegmentFrequencyDistribution.ObservedSamples)
                                                                                .Where(s => s.Type == segmentType).Distinct().OrderBy(s => s.Category(), categoryComparer).ThenBy(s => s, segmentComparer)
                                                                                .Select(s => new SegmentMappingsTableSegmentViewModel(s, _soundType)).Concat(new SegmentMappingsTableSegmentViewModel(null, _soundType)).ToArray());
            _categories = new ReadOnlyList <SegmentCategoryViewModel>(_segments.GroupBy(s => s.DomainSegment == null ? string.Empty : s.DomainSegment.Category())
                                                                      .OrderBy(g => g.Key, categoryComparer).Select(g => new SegmentCategoryViewModel(g.Key, g)).ToArray());

            var mappingLookup = new Dictionary <UnorderedTuple <string, string>, HashSet <UnorderedTuple <string, string> > >();

            foreach (SegmentMappingViewModel mapping in mappings)
            {
                string        seg1, seg2;
                FeatureSymbol leftEnv1, rightEnv1, leftEnv2, rightEnv2;
                if (ListSegmentMappings.Normalize(projectService.Project.Segmenter, mapping.Segment1, out seg1, out leftEnv1, out rightEnv1) &&
                    ListSegmentMappings.Normalize(projectService.Project.Segmenter, mapping.Segment2, out seg2, out leftEnv2, out rightEnv2))
                {
                    UnorderedTuple <string, string>            key = UnorderedTuple.Create(seg1, seg2);
                    HashSet <UnorderedTuple <string, string> > m   = mappingLookup.GetOrCreate(key, () => new HashSet <UnorderedTuple <string, string> >());
                    m.Add(UnorderedTuple.Create(mapping.Segment1, mapping.Segment2));
                }
            }

            IWordAligner aligner = projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];

            foreach (SegmentMappingsTableSegmentViewModel segment1 in _segments)
            {
                bool isEnabled = true;
                foreach (SegmentMappingsTableSegmentViewModel segment2 in _segments)
                {
                    if (EqualityComparer <Segment> .Default.Equals(segment1.DomainSegment, segment2.DomainSegment))
                    {
                        isEnabled = false;
                    }

                    int delta = segment1.DomainSegment == null || segment2.DomainSegment == null ? -1
                                                : aligner.Delta(segment1.DomainSegment.FeatureStruct, segment2.DomainSegment.FeatureStruct);
                    SegmentMappingsTableSegmentPairViewModel segmentPair = segmentPairFactory(segment1, segment2, delta, isEnabled);
                    segmentPair.MeetsThreshold = delta != -1 && delta <= _threshold;
                    HashSet <UnorderedTuple <string, string> > pairMappings;
                    if (mappingLookup.TryGetValue(UnorderedTuple.Create(segment1.StrRep, segment2.StrRep), out pairMappings))
                    {
                        segmentPair.Mappings.Mappings.AddRange(pairMappings.Select(m => mappingFactory(m.Item1, m.Item2)));
                    }
                    segment1.SegmentPairs.Add(segmentPair);
                }
            }
        }
示例#27
0
 protected WordAlignerResultBase(IWordAligner wordAligner)
 {
     _wordAligner = wordAligner;
 }
示例#28
0
        public void Process(VarietyPair varietyPair)
        {
            IWordAligner aligner           = _project.WordAligners[_alignerID];
            var          ambiguousMeanings = new List <Tuple <Meaning, IWordAlignerResult, IWordAlignerResult[]> >();

            varietyPair.WordPairs.Clear();
            var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();
            int cognateCount      = 0;

            foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings)
            {
                Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();

                if (words1.Length == 1 && words2.Length == 1)
                {
                    Word     word1 = words1.Single();
                    Word     word2 = words2.Single();
                    WordPair wp    = varietyPair.WordPairs.Add(word1, word2);
                    _project.CognacyDecisions.UpdateActualCognacy(wp);
                    IWordAlignerResult alignerResult = aligner.Compute(wp);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult);
                    Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                    if (wp.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment);
                        cognateCount++;
                    }
                    wp.PhoneticSimilarityScore = alignment.NormalizedScore;
                }
                else if (words1.Length > 0 && words2.Length > 0)
                {
                    IWordAlignerResult[] alignerResults   = words1.SelectMany(w1 => words2.Select(w2 => aligner.Compute(w1, w2))).ToArray();
                    IWordAlignerResult   maxAlignerResult = alignerResults.MaxBy(a => a.BestRawScore);
                    ambiguousMeanings.Add(Tuple.Create(meaning, maxAlignerResult, alignerResults));
                    WordPair wp = varietyPair.WordPairs.Add(maxAlignerResult.Words[0], maxAlignerResult.Words[1]);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, maxAlignerResult);
                }
            }

            ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[_cognateIdentifierID];

            for (int i = 0; i < ambiguousMeanings.Count; i++)
            {
                ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > newCognateCorrCounts = cognateCorrCounts.Clone();
                int newCognateCount = cognateCount;
                for (int j = i + 1; j < ambiguousMeanings.Count; j++)
                {
                    if (varietyPair.WordPairs[ambiguousMeanings[j].Item1].Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, newCognateCorrCounts, ambiguousMeanings[j].Item2.GetAlignments().First());
                        newCognateCount++;
                    }
                }

                IWordAlignerResult bestAlignerResult = null;
                WordPair           bestWordPair      = null;
                foreach (IWordAlignerResult alignerResult in ambiguousMeanings[i].Item3)
                {
                    ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > alignmentCognateCorrCounts = newCognateCorrCounts.Clone();
                    int alignmentCognateCount             = newCognateCount;
                    Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                    varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1);
                    WordPair wordPair = varietyPair.WordPairs.Add(alignerResult.Words[0], alignerResult.Words[1]);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult);
                    if (wordPair.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, alignmentCognateCorrCounts, alignment);
                        alignmentCognateCount++;
                    }
                    varietyPair.CognateCount = alignmentCognateCount;
                    varietyPair.CognateSoundCorrespondenceFrequencyDistribution = alignmentCognateCorrCounts;
                    cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult);
                    wordPair.PhoneticSimilarityScore = alignment.NormalizedScore;
                    if (bestWordPair == null || Compare(wordPair, bestWordPair) > 0)
                    {
                        bestWordPair      = wordPair;
                        bestAlignerResult = alignerResult;
                    }
                }

                Debug.Assert(bestWordPair != null);
                varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1);
                varietyPair.WordPairs.Add(bestWordPair);
                _project.CognacyDecisions.UpdateActualCognacy(bestWordPair);
                if (bestWordPair.Cognacy)
                {
                    UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, bestAlignerResult.GetAlignments().First());
                    cognateCount++;
                }
            }

            varietyPair.CognateCount = cognateCount;
            varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts;
        }
示例#29
0
        public void Process(VarietyPair varietyPair)
        {
            IWordAligner aligner = _project.WordAligners[_alignerID];

            varietyPair.WordPairs.Clear();
            var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();
            int cognateCount      = 0;

            foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings)
            {
                Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray();
                if (words1.Length == 1 && words2.Length == 1)
                {
                    Word     word1 = words1.Single();
                    Word     word2 = words2.Single();
                    WordPair wp    = varietyPair.WordPairs.Add(word1, word2);
                    _project.CognacyDecisions.UpdateActualCognacy(wp);
                    IWordAlignerResult alignerResult = aligner.Compute(wp);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult);
                    Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First();
                    if (wp.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment);
                        cognateCount++;
                    }
                    wp.PhoneticSimilarityScore = alignment.NormalizedScore;
                }
                else if (words1.Length > 0 && words2.Length > 0)
                {
                    WordPair           bestWordPair      = null;
                    IWordAlignerResult bestAlignerResult = null;
                    foreach (Word w1 in words1)
                    {
                        foreach (Word w2 in words2)
                        {
                            IWordAlignerResult alignerResult = aligner.Compute(w1, w2);
                            if (bestAlignerResult == null || alignerResult.BestRawScore > bestAlignerResult.BestRawScore)
                            {
                                bestWordPair      = new WordPair(w1, w2);
                                bestAlignerResult = alignerResult;
                            }
                        }
                    }

                    Debug.Assert(bestWordPair != null);
                    varietyPair.WordPairs.Add(bestWordPair);
                    _project.CognacyDecisions.UpdateActualCognacy(bestWordPair);
                    _thresholdCognateIdentifier.UpdatePredictedCognacy(bestWordPair, bestAlignerResult);
                    Alignment <Word, ShapeNode> alignment = bestAlignerResult.GetAlignments().First();
                    if (bestWordPair.Cognacy)
                    {
                        UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment);
                        cognateCount++;
                    }
                    bestWordPair.PhoneticSimilarityScore = alignment.NormalizedScore;
                }
            }

            varietyPair.CognateCount = cognateCount;
            varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts;
        }
示例#30
0
        private void AlignWords()
        {
            if (_selectedMeaning == null)
            {
                return;
            }

            _busyService.ShowBusyIndicatorUntilFinishDrawing();

            var words = new HashSet <Word>();

            foreach (VarietyPair vp in _projectService.Project.VarietyPairs)
            {
                WordPair wp;
                if (vp.WordPairs.TryGetValue(_selectedMeaning.DomainMeaning, out wp))
                {
                    words.Add(wp.Word1);
                    words.Add(wp.Word2);
                }
            }
            if (words.Count == 0)
            {
                _words.Clear();
                return;
            }

            IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];
            Alignment <Word, ShapeNode> alignment;

            if (words.Count == 1)
            {
                Word word = words.First();
                Annotation <ShapeNode> prefixAnn = word.Prefix;
                var prefix = new AlignmentCell <ShapeNode>(prefixAnn != null ? word.Shape.GetNodes(prefixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>());
                IEnumerable <AlignmentCell <ShapeNode> > columns = word.Shape.GetNodes(word.Stem.Span).Where(NodeFilter).Select(n => new AlignmentCell <ShapeNode>(n));
                Annotation <ShapeNode> suffixAnn = word.Suffix;
                var suffix = new AlignmentCell <ShapeNode>(suffixAnn != null ? word.Shape.GetNodes(suffixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>());
                alignment = new Alignment <Word, ShapeNode>(0, 0, Tuple.Create(word, prefix, columns, suffix));
            }
            else
            {
                IWordAlignerResult result = aligner.Compute(words);
                alignment = result.GetAlignments().First();
            }

            List <Cluster <Word> > cognateSets = _projectService.Project.GenerateCognateSets(_selectedMeaning.DomainMeaning).OrderBy(c => c.Noise).ThenByDescending(c => c.DataObjects.Count).ToList();

            ColumnCount = alignment.ColumnCount;
            using (_words.BulkUpdate())
            {
                _words.Clear();
                for (int i = 0; i < alignment.SequenceCount; i++)
                {
                    AlignmentCell <ShapeNode> prefix = alignment.Prefixes[i];
                    Word word = alignment.Sequences[i];
                    IEnumerable <AlignmentCell <ShapeNode> > columns = Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[i, col]);
                    AlignmentCell <ShapeNode> suffix = alignment.Suffixes[i];
                    int cognateSetIndex = cognateSets.FindIndex(set => set.DataObjects.Contains(word));
                    _words.Add(new MultipleWordAlignmentWordViewModel(word, prefix, columns, suffix, cognateSetIndex == cognateSets.Count - 1 ? int.MaxValue : cognateSetIndex + 1));
                }
            }
        }
示例#31
0
 protected WordAlignerResultBase(IWordAligner wordAligner)
 {
     _wordAligner = wordAligner;
 }