Exemplo n.º 1
0
            public TestEnvironment(string word1, string word2, bool ignoreRegularInsertionDeletion = false, bool regularConsEqual = false, bool automaticRegularCorrThreshold = false)
            {
                _segmentPool = new SegmentPool();
                _project     = TestHelpers.GetTestProject(_segmentPool);
                _project.Meanings.Add(new Meaning("gloss1", "cat1"));
                _project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
                _project.Varieties[0].Words.Add(new Word(word1, _project.Meanings[0]));
                _project.Varieties[1].Words.Add(new Word(word2, _project.Meanings[0]));

                var varSegementer = new VarietySegmenter(_project.Segmenter);

                foreach (Variety variety in _project.Varieties)
                {
                    varSegementer.Process(variety);
                }

                var vp = new VarietyPair(_project.Varieties[0], _project.Varieties[1]);

                _project.VarietyPairs.Add(vp);

                var wordPairGenerator = new SimpleWordPairGenerator(_segmentPool, _project, 0.3, "primary");

                wordPairGenerator.Process(vp);
                vp.CognateSoundCorrespondenceFrequencyDistribution = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();

                var ignoredMappings         = Substitute.For <ISegmentMappings>();
                var similarSegmentsMappings = Substitute.For <ISegmentMappings>();

                _cognateIdentifier = new BlairCognateIdentifier(_segmentPool, ignoreRegularInsertionDeletion, regularConsEqual, automaticRegularCorrThreshold,
                                                                3, ignoredMappings, similarSegmentsMappings);

                _aligner = new TestWordAligner(_segmentPool);
            }
Exemplo n.º 2
0
        public void Compare(VarietyPair varietyPair)
        {
            _busyService.ShowBusyIndicatorUntilFinishDrawing();
            var pipeline = new Pipeline <VarietyPair>(GetCompareProcessors());

            pipeline.Process(varietyPair.ToEnumerable());
        }
Exemplo n.º 3
0
        private void WriteOutput(TextWriter outputWriter)
        {
            var optics = new Optics <Variety>(variety => variety.VarietyPairs
                                              .Select(pair => Tuple.Create(pair.GetOtherVariety(variety), 1.0 - pair.LexicalSimilarityScore))
                                              .Concat(Tuple.Create(variety, 0.0)), 2);

            Variety[] varieties = optics.ClusterOrder(Project.Varieties).Select(e => e.DataObject).ToArray();
            foreach (Variety variety in varieties)
            {
                outputWriter.Write("\t");
                outputWriter.Write(variety.Name);
            }
            outputWriter.WriteLine();
            for (int i = 0; i < varieties.Length; i++)
            {
                outputWriter.Write(varieties[i].Name);
                int len = IsHalf ? i + 1 : varieties.Length;
                for (int j = 0; j < len; j++)
                {
                    outputWriter.Write("\t");
                    if (i == j)
                    {
                        outputWriter.Write(IsDistance ? "0.00" : "1.00");
                    }
                    else
                    {
                        VarietyPair varietyPair = varieties[i].VarietyPairs[varieties[j]];
                        double      score       = IsDistance ? 1.0 - varietyPair.LexicalSimilarityScore : varietyPair.LexicalSimilarityScore;
                        outputWriter.Write("{0:0.00}", score);
                    }
                }
                outputWriter.WriteLine();
            }
        }
Exemplo n.º 4
0
        public VarietyPairSurrogate(VarietyPair vp)
        {
            Variety1 = vp.Variety1.Name;
            Variety2 = vp.Variety2.Name;
            var wordPairSurrogates = new Dictionary <WordPair, WordPairSurrogate>();

            _wordPairs = vp.WordPairs.Select(wp => wordPairSurrogates.GetValue(wp, () => new WordPairSurrogate(wp))).ToList();
            PhoneticSimilarityScore = vp.PhoneticSimilarityScore;
            LexicalSimilarityScore  = vp.LexicalSimilarityScore;
            DefaultSoundCorrespondenceProbability            = vp.DefaultSoundCorrespondenceProbability;
            _cognateSoundCorrespondenceFrequencyDistribution = new Dictionary <SoundContextSurrogate, Tuple <string[], int>[]>();
            foreach (SoundContext lhs in vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions)
            {
                FrequencyDistribution <Ngram <Segment> > freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[lhs];
                _cognateSoundCorrespondenceFrequencyDistribution[new SoundContextSurrogate(lhs)] = freqDist.ObservedSamples.Select(ngram => Tuple.Create(ngram.Select(seg => seg.StrRep).ToArray(), freqDist[ngram])).ToArray();
            }
            _cognateSoundCorrespondenceByPosition = new Dictionary <string, List <SoundCorrespondenceSurrogate> >();
            foreach (KeyValuePair <FeatureSymbol, SoundCorrespondenceCollection> kvp in vp.CognateSoundCorrespondencesByPosition)
            {
                string pos;
                if (kvp.Key == CogFeatureSystem.Onset)
                {
                    pos = "onset";
                }
                else if (kvp.Key == CogFeatureSystem.Nucleus)
                {
                    pos = "nucleus";
                }
                else
                {
                    pos = "coda";
                }
                _cognateSoundCorrespondenceByPosition[pos] = kvp.Value.Select(corr => new SoundCorrespondenceSurrogate(wordPairSurrogates, corr)).ToList();
            }
        }
Exemplo n.º 5
0
        private int GetMaxSoundChangeScore(Word word, ShapeNode node, Word otherWord)
        {
            if (word.Variety == otherWord.Variety)
            {
                return(0);
            }

            VarietyPair varietyPair = word.Variety.VarietyPairs[otherWord.Variety];

            if (varietyPair.SoundChangeProbabilityDistribution == null)
            {
                return(0);
            }

            double prob;

            if (varietyPair.Variety1 == word.Variety)
            {
                SoundContext lhs = node.ToSoundContext(_segmentPool, _contextualSoundClasses);
                prob = varietyPair.DefaultCorrespondenceProbability;
                IProbabilityDistribution <Ngram <Segment> > probDist;
                if (varietyPair.SoundChangeProbabilityDistribution.TryGetProbabilityDistribution(lhs, out probDist) && probDist.Samples.Count > 0)
                {
                    prob = probDist.Samples.Max(nseg => probDist[nseg]);
                }
            }
            else
            {
                Ngram <Segment> corr = _segmentPool.GetExisting(node);
                prob = varietyPair.SoundChangeProbabilityDistribution.Conditions.Count == 0 ? 0
                                        : varietyPair.SoundChangeProbabilityDistribution.Conditions.Max(lhs => varietyPair.SoundChangeProbabilityDistribution[lhs][corr]);
            }
            return((int)(MaxSoundChangeScore * prob));
        }
Exemplo n.º 6
0
        private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment <Word, ShapeNode> alignment, int column,
                               Ngram <Segment> v)
        {
            VarietyPair  vp      = wordPair.VarietyPair;
            SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses);
            FrequencyDistribution <Ngram <Segment> > freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context];
            int threshold;

            if (AutomaticRegularCorrespondenceThreshold)
            {
                int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions
                                .Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment)
                                .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]);
                if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count,
                                                                          out threshold))
                {
                    threshold = DefaultRegularCorrespondenceThreshold;
                }
            }
            else
            {
                threshold = DefaultRegularCorrespondenceThreshold;
            }
            return(freqDist[v] >= threshold);
        }
Exemplo n.º 7
0
 public SimilarityMatrixVarietyPairViewModel(SimilarityMetric similarityMetric, Variety thisVariety, VarietyPair varietyPair)
 {
     _varietyPair  = varietyPair;
     _thisVariety  = thisVariety;
     _otherVariety = _varietyPair.GetOtherVariety(_thisVariety);
     _switchToVarietyPairCommand = new RelayCommand(SwitchToVarietyPair);
     _similarityMetric           = similarityMetric;
 }
Exemplo n.º 8
0
        public void Compare(VarietyPair varietyPair)
        {
            _busyService.ShowBusyIndicatorUntilFinishDrawing();
            Messenger.Default.Send(new PerformingComparisonMessage(varietyPair));
            var pipeline = new Pipeline <VarietyPair>(GetCompareProcessors());

            pipeline.Process(varietyPair.ToEnumerable());
            Messenger.Default.Send(new ComparisonPerformedMessage(varietyPair));
        }
Exemplo n.º 9
0
        public VarietyPair ToVarietyPair(SegmentPool segmentPool, CogProject project)
        {
            var vp = new VarietyPair(project.Varieties[Variety1], project.Varieties[Variety2])
            {
                PhoneticSimilarityScore          = PhoneticSimilarityScore,
                LexicalSimilarityScore           = LexicalSimilarityScore,
                DefaultCorrespondenceProbability = DefaultCorrespondenceProbability
            };
            var wordPairs = new Dictionary <WordPairSurrogate, WordPair>();

            vp.WordPairs.AddRange(_wordPairs.Select(surrogate => wordPairs.GetValue(surrogate, () => surrogate.ToWordPair(project, vp))));
            var soundChanges = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();

            foreach (KeyValuePair <SoundContextSurrogate, Tuple <string[], int>[]> fd in _soundChanges)
            {
                SoundContext ctxt = fd.Key.ToSoundContext(project, segmentPool);
                FrequencyDistribution <Ngram <Segment> > freqDist = soundChanges[ctxt];
                foreach (Tuple <string[], int> sample in fd.Value)
                {
                    Ngram <Segment> corr = sample.Item1 == null ? new Ngram <Segment>() : new Ngram <Segment>(sample.Item1.Select(segmentPool.GetExisting));
                    freqDist.Increment(corr, sample.Item2);
                }
            }
            vp.SoundChangeFrequencyDistribution = soundChanges;
            IWordAligner aligner       = project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];
            int          segmentCount  = vp.Variety2.SegmentFrequencyDistribution.ObservedSamples.Count;
            int          possCorrCount = aligner.ExpansionCompressionEnabled ? (segmentCount * segmentCount) + segmentCount + 1 : segmentCount + 1;

            vp.SoundChangeProbabilityDistribution = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >(soundChanges,
                                                                                                                            (sc, freqDist) => new WittenBellProbabilityDistribution <Ngram <Segment> >(freqDist, possCorrCount));

            foreach (KeyValuePair <string, List <SoundCorrespondenceSurrogate> > kvp in _soundCorrespondenceCollections)
            {
                if (kvp.Value != null)
                {
                    FeatureSymbol pos = null;
                    switch (kvp.Key)
                    {
                    case "onset":
                        pos = CogFeatureSystem.Onset;
                        break;

                    case "nucleus":
                        pos = CogFeatureSystem.Nucleus;
                        break;

                    case "coda":
                        pos = CogFeatureSystem.Coda;
                        break;
                    }
                    vp.SoundCorrespondenceCollections[pos].AddRange(kvp.Value.Select(surrogate => surrogate.ToSoundCorrespondence(segmentPool, wordPairs)));
                }
            }
            return(vp);
        }
Exemplo n.º 10
0
        public void Process()
        {
            var        segmentPool = new SegmentPool();
            CogProject project     = TestHelpers.GetTestProject(_spanFactory, segmentPool);

            project.Meanings.AddRange(new[] { new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3") });
            project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
            project.Varieties[0].Words.AddRange(new[] { new Word("hɛ.loʊ", project.Meanings[0]), new Word("gan", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]), new Word("ban", project.Meanings[2]) });
            project.Varieties[1].Words.AddRange(new[] { new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("tan", project.Meanings[1]), new Word("pæ", project.Meanings[2]) });
            project.WordAligners["primary"] = new TestWordAligner(segmentPool);
            var cognateIdentifier = Substitute.For <ICognateIdentifier>();

            cognateIdentifier.When(ci => ci.UpdateCognicity(Arg.Any <WordPair>(), Arg.Any <IWordAlignerResult>())).Do(ci =>
            {
                var wordPair = ci.Arg <WordPair>();
                if ((wordPair.Word1.StrRep == "hɛ.loʊ" && wordPair.Word2.StrRep == "hɛ.ɬa") ||
                    (wordPair.Word1.StrRep == "gʊd" && wordPair.Word2.StrRep == "tan") ||
                    (wordPair.Word1.StrRep == "bæ" && wordPair.Word2.StrRep == "pæ") ||
                    (wordPair.Word1.StrRep == "ban" && wordPair.Word2.StrRep == "pæ"))
                {
                    wordPair.AreCognatePredicted = true;
                    wordPair.CognicityScore      = 1.0;
                }
            });
            project.CognateIdentifiers["primary"] = cognateIdentifier;

            var varSegementer = new VarietySegmenter(project.Segmenter);

            foreach (Variety variety in project.Varieties)
            {
                varSegementer.Process(variety);
            }

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);

            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new CognicityWordPairGenerator(segmentPool, project, 0.3, "primary", "primary");

            wordPairGenerator.Process(vp);

            WordPair wp = vp.WordPairs[0];

            Assert.That(wp.Word1.StrRep, Is.EqualTo("hɛ.loʊ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("hɛ.ɬa"));

            wp = vp.WordPairs[1];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("gʊd"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("tan"));

            wp = vp.WordPairs[2];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("bæ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("pæ"));
        }
Exemplo n.º 11
0
 public void PinUnpinCommand_UnpinnedNoncognate_PinnedCognate()
 {
     using (var env = new TestEnvironment(null, false))
     {
         Assert.That(env.Project.CognacyDecisions, Is.Empty);
         Assert.That(env.WordPairViewModel.PinUnpinText, Is.EqualTo("Pin to cognates"));
         env.WordPairViewModel.PinUnpinCommand.Execute(null);
         VarietyPair vp = env.WordPairViewModel.DomainWordPair.VarietyPair;
         Assert.That(env.Project.CognacyDecisions, Is.EquivalentTo(new[] { new CognacyDecision(vp.Variety1, vp.Variety2, env.WordPairViewModel.DomainWordPair.Meaning, true) }));
     }
 }
Exemplo n.º 12
0
        public bool ExportVarietyPair(object ownerViewModel, VarietyPair varietyPair)
        {
            FileDialogResult result = _dialogService.ShowSaveFileDialog(ownerViewModel, "Export Variety Pair", VarietyPairExporters.Keys);

            if (result.IsValid)
            {
                return(Export(ownerViewModel, result.FileName, stream => VarietyPairExporters[result.SelectedFileType].Export(stream,
                                                                                                                              _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner], varietyPair)));
            }
            return(false);
        }
Exemplo n.º 13
0
        public void UpdateCognicity_RegularCorrespondences()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);

            project.Meanings.AddRange(new[] { new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3") });
            project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
            project.Varieties[0].Words.AddRange(new[] { new Word("hɛ.lo", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]) });
            project.Varieties[1].Words.AddRange(new[] { new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2]) });

            var varSegementer = new VarietySegmenter(project.Segmenter);

            foreach (Variety variety in project.Varieties)
            {
                varSegementer.Process(variety);
            }

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);

            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");

            wordPairGenerator.Process(vp);

            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("l"))].Increment(segmentPool.GetExisting("ɬ"), 3);
            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("b"))].Increment(segmentPool.GetExisting("p"), 3);

            var aligner         = new TestWordAligner(segmentPool);
            var ignoredMappings = Substitute.For <ISegmentMappings>();

            ignoredMappings.IsMapped(Arg.Any <ShapeNode>(), Arg.Any <Ngram <Segment> >(), Arg.Any <ShapeNode>(), Arg.Any <ShapeNode>(), Arg.Any <Ngram <Segment> >(), Arg.Any <ShapeNode>()).Returns(false);
            var similarSegmentsMappings = Substitute.For <ISegmentMappings>();

            similarSegmentsMappings.IsMapped(Arg.Any <ShapeNode>(), segmentPool.GetExisting("b"), Arg.Any <ShapeNode>(), Arg.Any <ShapeNode>(), segmentPool.GetExisting("p"), Arg.Any <ShapeNode>()).Returns(true);
            var cognateIdentifier = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];

            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);
        }
Exemplo n.º 14
0
        public VarietyPairViewModel(SegmentPool segmentPool, IProjectService projectService, WordPairsViewModel.Factory wordPairsFactory,
                                    WordPairViewModel.Factory wordPairFactory, VarietyPair varietyPair, bool areVarietiesInOrder)
            : base(varietyPair)
        {
            _segmentPool         = segmentPool;
            _projectService      = projectService;
            _varietyPair         = varietyPair;
            _areVarietiesInOrder = areVarietiesInOrder;
            _wordPairsFactory    = wordPairsFactory;
            _wordPairFactory     = wordPairFactory;

            Messenger.Default.Register <ComparisonPerformedMessage>(this, HandleComparisonPerformed);

            UpdateVarietyPair();
        }
Exemplo n.º 15
0
        public WordPair ToWordPair(CogProject project, VarietyPair vp)
        {
            Meaning meaning = project.Meanings[Meaning];
            Word    word1   = vp.Variety1.Words[meaning].First(w => w.StrRep == Word1);
            Word    word2   = vp.Variety2.Words[meaning].First(w => w.StrRep == Word2);
            var     wp      = new WordPair(word1, word2)
            {
                AreCognatePredicted     = AreCognatePredicted,
                PhoneticSimilarityScore = PhoneticSimilarityScore,
                CognicityScore          = CognicityScore
            };

            wp.AlignmentNotes.AddRange(_alignmentNotes);
            return(wp);
        }
Exemplo n.º 16
0
        public void Process()
        {
            var segmentPool = new SegmentPool();
            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);
            project.Meanings.AddRange(new[] {new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3")});
            project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
            project.Varieties[0].Words.AddRange(new[] {new Word("hɛ.loʊ", project.Meanings[0]), new Word("gan", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]), new Word("ban", project.Meanings[2])});
            project.Varieties[1].Words.AddRange(new[] {new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("tan", project.Meanings[1]), new Word("pæ", project.Meanings[2])});
            project.WordAligners["primary"] = new TestWordAligner(segmentPool);
            var cognateIdentifier = Substitute.For<ICognateIdentifier>();
            cognateIdentifier.When(ci => ci.UpdateCognacy(Arg.Any<WordPair>(), Arg.Any<IWordAlignerResult>())).Do(ci =>
                {
                    var wordPair = ci.Arg<WordPair>();
                    if ((wordPair.Word1.StrRep == "hɛ.loʊ" && wordPair.Word2.StrRep == "hɛ.ɬa")
                        || (wordPair.Word1.StrRep == "gʊd" && wordPair.Word2.StrRep == "tan")
                        || (wordPair.Word1.StrRep == "bæ" && wordPair.Word2.StrRep == "pæ")
                        || (wordPair.Word1.StrRep == "ban" && wordPair.Word2.StrRep == "pæ"))
                    {
                        wordPair.AreCognatePredicted = true;
                        wordPair.CognacyScore = 1.0;
                    }
                });
            project.CognateIdentifiers["primary"] = cognateIdentifier;

            var varSegementer = new VarietySegmenter(project.Segmenter);
            foreach (Variety variety in project.Varieties)
                varSegementer.Process(variety);

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);
            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new CognacyWordPairGenerator(segmentPool, project, 0.3, "primary", "primary");
            wordPairGenerator.Process(vp);

            WordPair wp = vp.WordPairs[0];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("hɛ.loʊ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("hɛ.ɬa"));

            wp = vp.WordPairs[1];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("gʊd"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("tan"));

            wp = vp.WordPairs[2];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("bæ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("pæ"));
        }
Exemplo n.º 17
0
        private void PerformComparison()
        {
            if (_varietyPairState == VarietyPairState.NotSelected || _selectedVarietyPair != null)
            {
                return;
            }

            _busyService.ShowBusyIndicatorUntilFinishDrawing();
            CogProject project = _projectService.Project;
            var        pair    = new VarietyPair(_selectedVariety1.DomainVariety, _selectedVariety2.DomainVariety);

            project.VarietyPairs.Add(pair);

            _analysisService.Compare(pair);

            SelectedVarietyPair = _varietyPairFactory(pair, true);
            VarietyPairState    = VarietyPairState.SelectedAndCompared;
        }
Exemplo n.º 18
0
        public void Export(Stream stream, CogProject project, SimilarityMetric similarityMetric)
        {
            var optics = new Optics <Variety>(variety => variety.VarietyPairs.Select(pair =>
            {
                double score = 0;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    score = pair.LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    score = pair.PhoneticSimilarityScore;
                    break;
                }
                return(Tuple.Create(pair.GetOtherVariety(variety), 1.0 - score));
            }).Concat(Tuple.Create(variety, 0.0)), 2);

            Variety[] varietyArray = optics.ClusterOrder(project.Varieties).Select(oe => oe.DataObject).ToArray();
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                foreach (Variety variety in varietyArray)
                {
                    writer.Write("\t");
                    writer.Write(variety.Name);
                }
                writer.WriteLine();
                for (int i = 0; i < varietyArray.Length; i++)
                {
                    writer.Write(varietyArray[i].Name);
                    for (int j = 0; j < varietyArray.Length; j++)
                    {
                        writer.Write("\t");
                        if (i != j)
                        {
                            VarietyPair varietyPair = varietyArray[i].VarietyPairs[varietyArray[j]];
                            double      score       = similarityMetric == SimilarityMetric.Lexical ? varietyPair.LexicalSimilarityScore : varietyPair.PhoneticSimilarityScore;
                            writer.Write("{0:0.00}", score);
                        }
                    }
                    writer.WriteLine();
                }
            }
        }
Exemplo n.º 19
0
        public void UpdateCognicity_NoSimilarSegments()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);

            project.Meanings.AddRange(new[] { new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3") });
            project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
            project.Varieties[0].Words.AddRange(new[] { new Word("hɛ.loʊ", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]) });
            project.Varieties[1].Words.AddRange(new[] { new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2]) });

            var varSegementer = new VarietySegmenter(project.Segmenter);

            foreach (Variety variety in project.Varieties)
            {
                varSegementer.Process(variety);
            }

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);

            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");

            wordPairGenerator.Process(vp);

            var aligner                 = new TestWordAligner(segmentPool);
            var ignoredMappings         = Substitute.For <ISegmentMappings>();
            var similarSegmentsMappings = Substitute.For <ISegmentMappings>();
            var cognateIdentifier       = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];

            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);
        }
Exemplo n.º 20
0
        public void UpdateCognacy_RegularCorrespondences()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);
            project.Meanings.AddRange(new[] {new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3")});
            project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
            project.Varieties[0].Words.AddRange(new[] {new Word("hɛ.lo", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2])});
            project.Varieties[1].Words.AddRange(new[] {new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2])});

            var varSegementer = new VarietySegmenter(project.Segmenter);
            foreach (Variety variety in project.Varieties)
                varSegementer.Process(variety);

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);
            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");
            wordPairGenerator.Process(vp);

            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("l"))].Increment(segmentPool.GetExisting("ɬ"), 3);
            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("b"))].Increment(segmentPool.GetExisting("p"), 3);

            var aligner = new TestWordAligner(segmentPool);
            var ignoredMappings = Substitute.For<ISegmentMappings>();
            ignoredMappings.IsMapped(Arg.Any<ShapeNode>(), Arg.Any<Ngram<Segment>>(), Arg.Any<ShapeNode>(), Arg.Any<ShapeNode>(), Arg.Any<Ngram<Segment>>(), Arg.Any<ShapeNode>()).Returns(false);
            var similarSegmentsMappings = Substitute.For<ISegmentMappings>();
            similarSegmentsMappings.IsMapped(Arg.Any<ShapeNode>(), segmentPool.GetExisting("b"), Arg.Any<ShapeNode>(), Arg.Any<ShapeNode>(), segmentPool.GetExisting("p"), Arg.Any<ShapeNode>()).Returns(true);
            var cognateIdentifier = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);
        }
Exemplo n.º 21
0
        public void Export(Stream stream, IWordAligner aligner, VarietyPair varietyPair)
        {
            using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream)))
            {
                writer.WriteLine("Similarity");
                writer.WriteLine("----------");
                writer.WriteLine("Lexical: {0:p}", varietyPair.LexicalSimilarityScore);
                writer.WriteLine("Phonetic: {0:p}", varietyPair.PhoneticSimilarityScore);
                writer.WriteLine();

                writer.WriteLine("Likely cognates");
                writer.WriteLine("--------------");
                WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => wp.Cognacy));
                writer.WriteLine();

                writer.WriteLine("Likely non-cognates");
                writer.WriteLine("-------------------");
                WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => !wp.Cognacy));
                writer.WriteLine();

                writer.WriteLine("Sound correspondences");
                writer.WriteLine("---------------------");
                bool first = true;
                foreach (SoundContext lhs in varietyPair.CognateSoundCorrespondenceProbabilityDistribution.Conditions)
                {
                    if (!first)
                    {
                        writer.WriteLine();
                    }
                    IProbabilityDistribution <Ngram <Segment> > probDist = varietyPair.CognateSoundCorrespondenceProbabilityDistribution[lhs];
                    FrequencyDistribution <Ngram <Segment> >    freqDist = varietyPair.CognateSoundCorrespondenceFrequencyDistribution[lhs];
                    writer.WriteLine(lhs.ToString());
                    foreach (var correspondence in freqDist.ObservedSamples.Select(corr => new { Segment = corr, Probability = probDist[corr], Frequency = freqDist[corr] }).OrderByDescending(corr => corr.Probability))
                    {
                        writer.WriteLine("{0}: {1:p}, {2}", correspondence.Segment, correspondence.Probability, correspondence.Frequency);
                    }
                    first = false;
                }
            }
        }
Exemplo n.º 22
0
            public TestEnvironment(bool?actualCognacy, bool predictedCognacy)
            {
                DispatcherHelper.Initialize();

                var segmentPool     = new SegmentPool();
                var spanFactory     = new ShapeSpanFactory();
                var projectService  = Substitute.For <IProjectService>();
                var analysisService = Substitute.For <IAnalysisService>();

                _project = TestHelpers.GetTestProject(spanFactory, segmentPool);
                _project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
                _project.Meanings.Add(new Meaning("meaning1", null));
                var word1 = new Word("wɜrd", _project.Meanings[0]);

                _project.Varieties[0].Words.Add(word1);
                _project.Segmenter.Segment(word1);
                var word2 = new Word("kɑr", _project.Meanings[0]);

                _project.Varieties[1].Words.Add(word2);
                _project.Segmenter.Segment(word2);

                var vp = new VarietyPair(_project.Varieties[0], _project.Varieties[1]);

                if (actualCognacy != null)
                {
                    _project.CognacyDecisions.Add(new CognacyDecision(vp.Variety1, vp.Variety2, _project.Meanings[0], (bool)actualCognacy));
                }

                _project.VarietyPairs.Add(vp);
                var wp = new WordPair(word1, word2)
                {
                    PredictedCognacy = predictedCognacy, ActualCognacy = actualCognacy
                };

                _project.VarietyPairs[0].WordPairs.Add(wp);

                projectService.Project.Returns(_project);
                _wordPairViewModel = new WordPairViewModel(projectService, analysisService, wp, true);
            }
Exemplo n.º 23
0
        private void PerformComparison()
        {
            CogProject  project = _projectService.Project;
            VarietyPair pair;

            if (_selectedVarietyPair == null)
            {
                pair = new VarietyPair(_selectedVariety1.DomainVariety, _selectedVariety2.DomainVariety);
                project.VarietyPairs.Add(pair);
            }
            else
            {
                pair = _selectedVarietyPair.DomainVarietyPair;
            }

            _analysisService.Compare(pair);

            if (_selectedVarietyPair == null)
            {
                SelectedVarietyPair = _varietyPairFactory(pair, true);
            }
            VarietyPairState = VarietyPairState.SelectedAndCompared;
        }
Exemplo n.º 24
0
        public VarietyPairViewModel(SegmentPool segmentPool, IProjectService projectService, WordPairsViewModel.Factory wordPairsFactory, VarietyPair varietyPair, bool areVarietiesInOrder)
        {
            _segmentPool         = segmentPool;
            _projectService      = projectService;
            _varietyPair         = varietyPair;
            _areVarietiesInOrder = areVarietiesInOrder;

            IWordAligner aligner = projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner];

            _cognates = wordPairsFactory();
            foreach (WordPair wp in _varietyPair.WordPairs.Where(wp => wp.AreCognatePredicted))
            {
                _cognates.WordPairs.Add(new WordPairViewModel(aligner, wp, _areVarietiesInOrder));
            }
            _noncognates = wordPairsFactory();
            foreach (WordPair wp in _varietyPair.WordPairs.Where(wp => !wp.AreCognatePredicted))
            {
                _noncognates.WordPairs.Add(new WordPairViewModel(aligner, wp, _areVarietiesInOrder));
            }

            _soundChanges = new ReadOnlyList <SoundChangeViewModel>(_varietyPair.SoundChangeProbabilityDistribution.Conditions.SelectMany(lhs => _varietyPair.SoundChangeProbabilityDistribution[lhs].Samples,
                                                                                                                                          (lhs, segment) => new SoundChangeViewModel(lhs, segment, _varietyPair.SoundChangeProbabilityDistribution[lhs][segment], _varietyPair.SoundChangeFrequencyDistribution[lhs][segment])).ToList());
        }
Exemplo n.º 25
0
        private bool CanShowInVarietyPairs()
        {
            if (_selectedWords.Count != 2)
            {
                return(false);
            }

            Word w1 = _selectedWords[0].DomainWord;
            Word w2 = _selectedWords[1].DomainWord;

            if (w1.Variety == w2.Variety)
            {
                return(false);
            }

            VarietyPair vp = w1.Variety.VarietyPairs[w2.Variety];
            WordPair    wp;

            if (vp.WordPairs.TryGetValue(_selectedMeaning.DomainMeaning, out wp))
            {
                return(wp.GetWord(w1.Variety) == w1 && wp.GetWord(w2.Variety) == w2);
            }
            return(false);
        }
Exemplo n.º 26
0
        public void UpdateCognacy_NoSimilarSegments()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);
            project.Meanings.AddRange(new[] {new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3")});
            project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
            project.Varieties[0].Words.AddRange(new[] {new Word("hɛ.loʊ", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2])});
            project.Varieties[1].Words.AddRange(new[] {new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2])});

            var varSegementer = new VarietySegmenter(project.Segmenter);
            foreach (Variety variety in project.Varieties)
                varSegementer.Process(variety);

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);
            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");
            wordPairGenerator.Process(vp);

            var aligner = new TestWordAligner(segmentPool);
            var ignoredMappings = Substitute.For<ISegmentMappings>();
            var similarSegmentsMappings = Substitute.For<ISegmentMappings>();
            var cognateIdentifier = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);
        }
Exemplo n.º 27
0
            public TestEnvironment(string word1, string word2, bool ignoreRegularInsertionDeletion = false, bool regularConsEqual = false, bool automaticRegularCorrThreshold = false)
            {
                _segmentPool = new SegmentPool();
                _project = TestHelpers.GetTestProject(_spanFactory, _segmentPool);
                _project.Meanings.Add(new Meaning("gloss1", "cat1"));
                _project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
                _project.Varieties[0].Words.Add(new Word(word1, _project.Meanings[0]));
                _project.Varieties[1].Words.Add(new Word(word2, _project.Meanings[0]));

                var varSegementer = new VarietySegmenter(_project.Segmenter);
                foreach (Variety variety in _project.Varieties)
                    varSegementer.Process(variety);

                var vp = new VarietyPair(_project.Varieties[0], _project.Varieties[1]);
                _project.VarietyPairs.Add(vp);

                var wordPairGenerator = new SimpleWordPairGenerator(_segmentPool, _project, 0.3, "primary");
                wordPairGenerator.Process(vp);
                vp.CognateSoundCorrespondenceFrequencyDistribution = new ConditionalFrequencyDistribution<SoundContext, Ngram<Segment>>();

                var ignoredMappings = Substitute.For<ISegmentMappings>();
                var similarSegmentsMappings = Substitute.For<ISegmentMappings>();
                _cognateIdentifier = new BlairCognateIdentifier(_segmentPool, ignoreRegularInsertionDeletion, regularConsEqual, automaticRegularCorrThreshold,
                    3, ignoredMappings, similarSegmentsMappings);

                _aligner = new TestWordAligner(_segmentPool);
            }
Exemplo n.º 28
0
 public ComparisonPerformedMessage(VarietyPair varietyPair = null)
 {
     _varietyPair = varietyPair;
 }
Exemplo n.º 29
0
        private void ShowInVarietyPairs()
        {
            VarietyPair vp = _selectedWords[0].Variety.DomainVariety.VarietyPairs[_selectedWords[1].Variety.DomainVariety];

            Messenger.Default.Send(new SwitchViewMessage(typeof(VarietyPairsViewModel), vp, _selectedMeaning.DomainMeaning));
        }
Exemplo n.º 30
0
 public PerformingComparisonMessage(VarietyPair varietyPair = null)
 {
     _varietyPair = varietyPair;
 }
Exemplo n.º 31
0
        // Following code is from AnalysisService, tweaked just a little (e.g., not getting the project from ProjectService).
        // TODO: Refactor this, and/or AnalysisService, so that we don't have to have this code duplication.
        // (The code duplication is currently necessary because AnalysisService lives in Cog.Application, which references parts
        // of WPF like PresentationCore -- so we can't use Cog.Application with Mono on Linux. Moving AnalysisService to a
        // different assembly, or moving the WPF-dependent code to a different assembly, would be a good solution.) - 2015-09 RM
        public void Compare(VarietyPair varietyPair)
        {
            var pipeline = new Pipeline <VarietyPair>(GetCompareProcessors());

            pipeline.Process(varietyPair.ToEnumerable());
        }
Exemplo n.º 32
0
        public void SetUp()
        {
            _featSys = new FeatureSystem
            {
                new SymbolicFeature("place",
                    new FeatureSymbol("bilabial"),
                    new FeatureSymbol("labiodental"),
                    new FeatureSymbol("dental"),
                    new FeatureSymbol("alveolar"),
                    new FeatureSymbol("retroflex"),
                    new FeatureSymbol("palato-alveolar"),
                    new FeatureSymbol("palatal"),
                    new FeatureSymbol("velar"),
                    new FeatureSymbol("uvular"),
                    new FeatureSymbol("pharyngeal"),
                    new FeatureSymbol("glottal")),
                new SymbolicFeature("manner",
                    new FeatureSymbol("stop"),
                    new FeatureSymbol("affricate"),
                    new FeatureSymbol("fricative"),
                    new FeatureSymbol("approximant"),
                    new FeatureSymbol("trill"),
                    new FeatureSymbol("flap"),
                    new FeatureSymbol("close-vowel"),
                    new FeatureSymbol("mid-vowel"),
                    new FeatureSymbol("open-vowel")),
                new SymbolicFeature("voice",
                    new FeatureSymbol("voice+"),
                    new FeatureSymbol("voice-")),
                new SymbolicFeature("height",
                    new FeatureSymbol("close"),
                    new FeatureSymbol("near-close"),
                    new FeatureSymbol("close-mid"),
                    new FeatureSymbol("mid"),
                    new FeatureSymbol("open-mid"),
                    new FeatureSymbol("near-open"),
                    new FeatureSymbol("open")),
                new SymbolicFeature("backness",
                    new FeatureSymbol("front"),
                    new FeatureSymbol("near-front"),
                    new FeatureSymbol("central"),
                    new FeatureSymbol("near-back"),
                    new FeatureSymbol("back")),
                new SymbolicFeature("round",
                    new FeatureSymbol("round+"),
                    new FeatureSymbol("round-"))
            };

            _segmentPool = new SegmentPool();
            _segmenter = new Segmenter(_spanFactory)
                {
                    Consonants =
                    {
                        {"c", FeatureStruct.New(_featSys).Symbol("palatal").Symbol("stop").Symbol("voice-").Value},
                        {"b", FeatureStruct.New(_featSys).Symbol("bilabial").Symbol("stop").Symbol("voice+").Value},
                        {"r", FeatureStruct.New(_featSys).Symbol("alveolar").Symbol("trill").Symbol("voice+").Value}
                    },
                    Vowels =
                    {
                        {"a", FeatureStruct.New(_featSys).Symbol("open").Symbol("front").Symbol("round-").Symbol("open-vowel").Symbol("voice+").Value}
                    },
                    Boundaries = {"-"},
                    Modifiers = {"\u0303", "\u0308"},
                    Joiners = {"\u0361"}
                };

            var syllabifier = new SimpleSyllabifier(false, false);

            var meaning = new Meaning("test", null);
            var v1 = new Variety("variety1");
            _word1 = new Word("car", meaning);
            _segmenter.Segment(_word1);
            v1.Words.Add(_word1);

            syllabifier.Process(v1);

            var v2 = new Variety("variety2");
            _word2 = new Word("bar", meaning);
            _segmenter.Segment(_word2);
            v2.Words.Add(_word2);

            syllabifier.Process(v2);

            var vp = new VarietyPair(v1, v2);
            vp.SoundChangeFrequencyDistribution = new ConditionalFrequencyDistribution<SoundContext, Ngram<Segment>>();
            vp.SoundChangeFrequencyDistribution[_word1.Shape.First.ToSoundContext(_segmentPool, Enumerable.Empty<SoundClass>())].Increment(_segmentPool.Get(_word2.Shape.First));
            vp.SoundChangeProbabilityDistribution = new ConditionalProbabilityDistribution<SoundContext, Ngram<Segment>>(vp.SoundChangeFrequencyDistribution,
                (sc, fd) => new MaxLikelihoodProbabilityDistribution<Ngram<Segment>>(fd));
            v1.VarietyPairs.VarietyPairAdded(vp);
            v2.VarietyPairs.VarietyPairAdded(vp);
        }
Exemplo n.º 33
0
        private void ReadInput(TextReader inputReader)
        {
            var  cognateSets   = new Dictionary <Meaning, List <HashSet <Variety> > >();
            var  meaningCounts = new Dictionary <Variety, int>();
            bool firstLine     = true;

            foreach (string line in ReadLines(inputReader))
            {
                string[] tokens = line.Split('\t');
                if (firstLine)
                {
                    for (int i = 1; i < tokens.Length; i++)
                    {
                        string gloss = tokens[i];
                        if (string.IsNullOrEmpty(gloss))
                        {
                            break;
                        }
                        Project.Meanings.Add(new Meaning(gloss, null));
                    }
                }
                else
                {
                    var variety = new Variety(tokens[0]);
                    Project.Varieties.Add(variety);
                    foreach (Variety other in Project.Varieties.Where(v => v != variety))
                    {
                        Project.VarietyPairs.Add(new VarietyPair(other, variety));
                    }
                    int meaningCount = 0;
                    for (int i = 1; i < tokens.Length; i++)
                    {
                        string setStr = tokens[i];

                        if (string.IsNullOrEmpty(setStr))
                        {
                            break;
                        }

                        // ignore meanings that contain question marks
                        if (setStr.Contains('?'))
                        {
                            continue;
                        }

                        Meaning meaning = Project.Meanings[i - 1];
                        List <HashSet <Variety> > sets = cognateSets.GetOrCreate(meaning,
                                                                                 () => new List <HashSet <Variety> >(Enumerable.Repeat((HashSet <Variety>)null, setStr.Length)));
                        var cognateVarieties = new HashSet <Variety>();
                        for (int j = 0; j < setStr.Length; j++)
                        {
                            if (sets[j] == null)
                            {
                                sets[j] = new HashSet <Variety>();
                            }

                            if (setStr[j] == '1')
                            {
                                foreach (Variety cognateVariety in sets[j].Except(cognateVarieties))
                                {
                                    cognateVarieties.Add(cognateVariety);
                                    VarietyPair pair = variety.VarietyPairs[cognateVariety];
                                    pair.CognateCount++;
                                }
                                sets[j].Add(variety);
                            }
                        }
                        meaningCount++;
                    }

                    meaningCounts[variety] = meaningCount;
                }
                firstLine = false;
            }

            foreach (VarietyPair pair in Project.VarietyPairs)
            {
                int meaningCount = Math.Min(meaningCounts[pair.Variety1], meaningCounts[pair.Variety2]);
                pair.LexicalSimilarityScore = Project.Meanings.Count == 0 ? 0 : (double)pair.CognateCount / meaningCount;
            }
        }
Exemplo n.º 34
0
        public void SetUp()
        {
            _featSys = new FeatureSystem
            {
                new SymbolicFeature("place",
                                    new FeatureSymbol("bilabial"),
                                    new FeatureSymbol("labiodental"),
                                    new FeatureSymbol("dental"),
                                    new FeatureSymbol("alveolar"),
                                    new FeatureSymbol("retroflex"),
                                    new FeatureSymbol("palato-alveolar"),
                                    new FeatureSymbol("palatal"),
                                    new FeatureSymbol("velar"),
                                    new FeatureSymbol("uvular"),
                                    new FeatureSymbol("pharyngeal"),
                                    new FeatureSymbol("glottal")),
                new SymbolicFeature("manner",
                                    new FeatureSymbol("stop"),
                                    new FeatureSymbol("affricate"),
                                    new FeatureSymbol("fricative"),
                                    new FeatureSymbol("approximant"),
                                    new FeatureSymbol("trill"),
                                    new FeatureSymbol("flap"),
                                    new FeatureSymbol("close-vowel"),
                                    new FeatureSymbol("mid-vowel"),
                                    new FeatureSymbol("open-vowel")),
                new SymbolicFeature("voice",
                                    new FeatureSymbol("voice+"),
                                    new FeatureSymbol("voice-")),
                new SymbolicFeature("height",
                                    new FeatureSymbol("close"),
                                    new FeatureSymbol("near-close"),
                                    new FeatureSymbol("close-mid"),
                                    new FeatureSymbol("mid"),
                                    new FeatureSymbol("open-mid"),
                                    new FeatureSymbol("near-open"),
                                    new FeatureSymbol("open")),
                new SymbolicFeature("backness",
                                    new FeatureSymbol("front"),
                                    new FeatureSymbol("near-front"),
                                    new FeatureSymbol("central"),
                                    new FeatureSymbol("near-back"),
                                    new FeatureSymbol("back")),
                new SymbolicFeature("round",
                                    new FeatureSymbol("round+"),
                                    new FeatureSymbol("round-"))
            };

            _segmentPool = new SegmentPool();
            _segmenter   = new Segmenter(_spanFactory)
            {
                Consonants =
                {
                    { "c", FeatureStruct.New(_featSys).Symbol("palatal").Symbol("stop").Symbol("voice-").Value   },
                    { "b", FeatureStruct.New(_featSys).Symbol("bilabial").Symbol("stop").Symbol("voice+").Value  },
                    { "r", FeatureStruct.New(_featSys).Symbol("alveolar").Symbol("trill").Symbol("voice+").Value }
                },
                Vowels =
                {
                    { "a", FeatureStruct.New(_featSys).Symbol("open").Symbol("front").Symbol("round-").Symbol("open-vowel").Symbol("voice+").Value }
                },
                Boundaries = { "-" },
                Modifiers  = { "\u0303", "\u0308" },
                Joiners    = { "\u0361" }
            };

            var syllabifier = new SimpleSyllabifier(false, false);

            var meaning = new Meaning("test", null);
            var v1      = new Variety("variety1");

            _word1 = new Word("car", meaning);
            _segmenter.Segment(_word1);
            v1.Words.Add(_word1);

            syllabifier.Process(v1);

            var v2 = new Variety("variety2");

            _word2 = new Word("bar", meaning);
            _segmenter.Segment(_word2);
            v2.Words.Add(_word2);

            syllabifier.Process(v2);

            var vp = new VarietyPair(v1, v2);

            vp.CognateSoundCorrespondenceFrequencyDistribution = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();
            vp.CognateSoundCorrespondenceFrequencyDistribution[_word1.Shape.First.ToSoundContext(_segmentPool, Enumerable.Empty <SoundClass>())].Increment(_segmentPool.Get(_word2.Shape.First));
            vp.CognateSoundCorrespondenceProbabilityDistribution = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >(vp.CognateSoundCorrespondenceFrequencyDistribution,
                                                                                                                                           (sc, fd) => new MaxLikelihoodProbabilityDistribution <Ngram <Segment> >(fd));
            v1.VarietyPairs.VarietyPairAdded(vp);
            v2.VarietyPairs.VarietyPairAdded(vp);
        }
Exemplo n.º 35
0
        private int GetSoundChangeScore(Word sequence1, ShapeNode p1, ShapeNode p2, Word sequence2, ShapeNode q1, ShapeNode q2)
        {
            if (sequence1.Variety == sequence2.Variety)
            {
                return(0);
            }

            VarietyPair varietyPair = sequence1.Variety.VarietyPairs[sequence2.Variety];

            if (varietyPair.SoundChangeProbabilityDistribution == null)
            {
                return(0);
            }

            if (sequence1.Variety == varietyPair.Variety2)
            {
                ShapeNode tempNode = p1;
                p1 = q1;
                q1 = tempNode;

                tempNode = p2;
                p2       = q2;
                q2       = tempNode;
            }

            Ngram <Segment> target;

            if (p1 == null)
            {
                target = new Ngram <Segment>();
            }
            else
            {
                Segment targetSegment = _segmentPool.GetExisting(p1);
                target = p2 == null ? targetSegment : new Ngram <Segment>(targetSegment, _segmentPool.GetExisting(p2));
            }

            Ngram <Segment> corr;

            if (q1 == null)
            {
                corr = new Ngram <Segment>();
            }
            else
            {
                Segment corrSegment = _segmentPool.GetExisting(q1);
                corr = q2 == null ? corrSegment : new Ngram <Segment>(corrSegment, _segmentPool.GetExisting(q2));
            }

            ShapeNode  leftNode = p1 == null ? p2 : p1.GetPrev(NodeFilter);
            SoundClass leftEnv;

            if (leftNode == null || !_contextualSoundClasses.TryGetMatchingSoundClass(_segmentPool, leftNode, out leftEnv))
            {
                leftEnv = null;
            }
            ShapeNode  pRight    = p2 ?? p1;
            ShapeNode  rightNode = pRight == null ? null : pRight.GetNext(NodeFilter);
            SoundClass rightEnv;

            if (rightNode == null || !_contextualSoundClasses.TryGetMatchingSoundClass(_segmentPool, rightNode, out rightEnv))
            {
                rightEnv = null;
            }

            var lhs = new SoundContext(leftEnv, target, rightEnv);
            IProbabilityDistribution <Ngram <Segment> > probDist;
            double prob = varietyPair.SoundChangeProbabilityDistribution.TryGetProbabilityDistribution(lhs, out probDist) ? probDist[corr]
                                : varietyPair.DefaultCorrespondenceProbability;

            return((int)(MaxSoundChangeScore * prob));
        }