Exemple #1
0
            public TestEnvironment(string word1, string word2, bool ignoreRegularInsertionDeletion = false, bool regularConsEqual = false, bool automaticRegularCorrThreshold = false)
            {
                _segmentPool = new SegmentPool();
                _project     = TestHelpers.GetTestProject(_segmentPool);
                _project.Meanings.Add(new Meaning("gloss1", "cat1"));
                _project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
                _project.Varieties[0].Words.Add(new Word(word1, _project.Meanings[0]));
                _project.Varieties[1].Words.Add(new Word(word2, _project.Meanings[0]));

                var varSegementer = new VarietySegmenter(_project.Segmenter);

                foreach (Variety variety in _project.Varieties)
                {
                    varSegementer.Process(variety);
                }

                var vp = new VarietyPair(_project.Varieties[0], _project.Varieties[1]);

                _project.VarietyPairs.Add(vp);

                var wordPairGenerator = new SimpleWordPairGenerator(_segmentPool, _project, 0.3, "primary");

                wordPairGenerator.Process(vp);
                vp.CognateSoundCorrespondenceFrequencyDistribution = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >();

                var ignoredMappings         = Substitute.For <ISegmentMappings>();
                var similarSegmentsMappings = Substitute.For <ISegmentMappings>();

                _cognateIdentifier = new BlairCognateIdentifier(_segmentPool, ignoreRegularInsertionDeletion, regularConsEqual, automaticRegularCorrThreshold,
                                                                3, ignoredMappings, similarSegmentsMappings);

                _aligner = new TestWordAligner(_segmentPool);
            }
        public void Process()
        {
            var        segmentPool = new SegmentPool();
            CogProject project     = TestHelpers.GetTestProject(_spanFactory, segmentPool);

            project.Meanings.AddRange(new[] { new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3") });
            project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
            project.Varieties[0].Words.AddRange(new[] { new Word("hɛ.loʊ", project.Meanings[0]), new Word("gan", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]), new Word("ban", project.Meanings[2]) });
            project.Varieties[1].Words.AddRange(new[] { new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("tan", project.Meanings[1]), new Word("pæ", project.Meanings[2]) });
            project.WordAligners["primary"] = new TestWordAligner(segmentPool);
            var cognateIdentifier = Substitute.For <ICognateIdentifier>();

            cognateIdentifier.When(ci => ci.UpdateCognicity(Arg.Any <WordPair>(), Arg.Any <IWordAlignerResult>())).Do(ci =>
            {
                var wordPair = ci.Arg <WordPair>();
                if ((wordPair.Word1.StrRep == "hɛ.loʊ" && wordPair.Word2.StrRep == "hɛ.ɬa") ||
                    (wordPair.Word1.StrRep == "gʊd" && wordPair.Word2.StrRep == "tan") ||
                    (wordPair.Word1.StrRep == "bæ" && wordPair.Word2.StrRep == "pæ") ||
                    (wordPair.Word1.StrRep == "ban" && wordPair.Word2.StrRep == "pæ"))
                {
                    wordPair.AreCognatePredicted = true;
                    wordPair.CognicityScore      = 1.0;
                }
            });
            project.CognateIdentifiers["primary"] = cognateIdentifier;

            var varSegementer = new VarietySegmenter(project.Segmenter);

            foreach (Variety variety in project.Varieties)
            {
                varSegementer.Process(variety);
            }

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);

            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new CognicityWordPairGenerator(segmentPool, project, 0.3, "primary", "primary");

            wordPairGenerator.Process(vp);

            WordPair wp = vp.WordPairs[0];

            Assert.That(wp.Word1.StrRep, Is.EqualTo("hɛ.loʊ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("hɛ.ɬa"));

            wp = vp.WordPairs[1];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("gʊd"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("tan"));

            wp = vp.WordPairs[2];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("bæ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("pæ"));
        }
        public void UpdateCognicity_RegularCorrespondences()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);

            project.Meanings.AddRange(new[] { new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3") });
            project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
            project.Varieties[0].Words.AddRange(new[] { new Word("hɛ.lo", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]) });
            project.Varieties[1].Words.AddRange(new[] { new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2]) });

            var varSegementer = new VarietySegmenter(project.Segmenter);

            foreach (Variety variety in project.Varieties)
            {
                varSegementer.Process(variety);
            }

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);

            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");

            wordPairGenerator.Process(vp);

            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("l"))].Increment(segmentPool.GetExisting("ɬ"), 3);
            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("b"))].Increment(segmentPool.GetExisting("p"), 3);

            var aligner         = new TestWordAligner(segmentPool);
            var ignoredMappings = Substitute.For <ISegmentMappings>();

            ignoredMappings.IsMapped(Arg.Any <ShapeNode>(), Arg.Any <Ngram <Segment> >(), Arg.Any <ShapeNode>(), Arg.Any <ShapeNode>(), Arg.Any <Ngram <Segment> >(), Arg.Any <ShapeNode>()).Returns(false);
            var similarSegmentsMappings = Substitute.For <ISegmentMappings>();

            similarSegmentsMappings.IsMapped(Arg.Any <ShapeNode>(), segmentPool.GetExisting("b"), Arg.Any <ShapeNode>(), Arg.Any <ShapeNode>(), segmentPool.GetExisting("p"), Arg.Any <ShapeNode>()).Returns(true);
            var cognateIdentifier = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];

            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);
        }
        public void Process()
        {
            var segmentPool = new SegmentPool();
            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);
            project.Meanings.AddRange(new[] {new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3")});
            project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
            project.Varieties[0].Words.AddRange(new[] {new Word("hɛ.loʊ", project.Meanings[0]), new Word("gan", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]), new Word("ban", project.Meanings[2])});
            project.Varieties[1].Words.AddRange(new[] {new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("tan", project.Meanings[1]), new Word("pæ", project.Meanings[2])});
            project.WordAligners["primary"] = new TestWordAligner(segmentPool);
            var cognateIdentifier = Substitute.For<ICognateIdentifier>();
            cognateIdentifier.When(ci => ci.UpdateCognacy(Arg.Any<WordPair>(), Arg.Any<IWordAlignerResult>())).Do(ci =>
                {
                    var wordPair = ci.Arg<WordPair>();
                    if ((wordPair.Word1.StrRep == "hɛ.loʊ" && wordPair.Word2.StrRep == "hɛ.ɬa")
                        || (wordPair.Word1.StrRep == "gʊd" && wordPair.Word2.StrRep == "tan")
                        || (wordPair.Word1.StrRep == "bæ" && wordPair.Word2.StrRep == "pæ")
                        || (wordPair.Word1.StrRep == "ban" && wordPair.Word2.StrRep == "pæ"))
                    {
                        wordPair.AreCognatePredicted = true;
                        wordPair.CognacyScore = 1.0;
                    }
                });
            project.CognateIdentifiers["primary"] = cognateIdentifier;

            var varSegementer = new VarietySegmenter(project.Segmenter);
            foreach (Variety variety in project.Varieties)
                varSegementer.Process(variety);

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);
            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new CognacyWordPairGenerator(segmentPool, project, 0.3, "primary", "primary");
            wordPairGenerator.Process(vp);

            WordPair wp = vp.WordPairs[0];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("hɛ.loʊ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("hɛ.ɬa"));

            wp = vp.WordPairs[1];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("gʊd"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("tan"));

            wp = vp.WordPairs[2];
            Assert.That(wp.Word1.StrRep, Is.EqualTo("bæ"));
            Assert.That(wp.Word2.StrRep, Is.EqualTo("pæ"));
        }
        public void UpdateCognicity_NoSimilarSegments()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);

            project.Meanings.AddRange(new[] { new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3") });
            project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
            project.Varieties[0].Words.AddRange(new[] { new Word("hɛ.loʊ", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2]) });
            project.Varieties[1].Words.AddRange(new[] { new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2]) });

            var varSegementer = new VarietySegmenter(project.Segmenter);

            foreach (Variety variety in project.Varieties)
            {
                varSegementer.Process(variety);
            }

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);

            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");

            wordPairGenerator.Process(vp);

            var aligner                 = new TestWordAligner(segmentPool);
            var ignoredMappings         = Substitute.For <ISegmentMappings>();
            var similarSegmentsMappings = Substitute.For <ISegmentMappings>();
            var cognateIdentifier       = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];

            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognicity(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);
        }
        public void UpdateCognacy_RegularCorrespondences()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);
            project.Meanings.AddRange(new[] {new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3")});
            project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
            project.Varieties[0].Words.AddRange(new[] {new Word("hɛ.lo", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2])});
            project.Varieties[1].Words.AddRange(new[] {new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2])});

            var varSegementer = new VarietySegmenter(project.Segmenter);
            foreach (Variety variety in project.Varieties)
                varSegementer.Process(variety);

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);
            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");
            wordPairGenerator.Process(vp);

            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("l"))].Increment(segmentPool.GetExisting("ɬ"), 3);
            vp.SoundChangeFrequencyDistribution[new SoundContext(segmentPool.GetExisting("b"))].Increment(segmentPool.GetExisting("p"), 3);

            var aligner = new TestWordAligner(segmentPool);
            var ignoredMappings = Substitute.For<ISegmentMappings>();
            ignoredMappings.IsMapped(Arg.Any<ShapeNode>(), Arg.Any<Ngram<Segment>>(), Arg.Any<ShapeNode>(), Arg.Any<ShapeNode>(), Arg.Any<Ngram<Segment>>(), Arg.Any<ShapeNode>()).Returns(false);
            var similarSegmentsMappings = Substitute.For<ISegmentMappings>();
            similarSegmentsMappings.IsMapped(Arg.Any<ShapeNode>(), segmentPool.GetExisting("b"), Arg.Any<ShapeNode>(), Arg.Any<ShapeNode>(), segmentPool.GetExisting("p"), Arg.Any<ShapeNode>()).Returns(true);
            var cognateIdentifier = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);
        }
Exemple #7
0
        private CogProject SetupProjectWithWords(TestEnvironment env)
        {
            var        segmentPool = new SegmentPool();
            CogProject project     = TestHelpers.GetTestProject(segmentPool);

            project.Meanings.AddRange(new[] { new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3") });
            project.Varieties.AddRange(new[] { new Variety("variety1"), new Variety("variety2") });
            project.Varieties[0].Words.AddRange(new[] { new Word("hello", project.Meanings[0]), new Word("good", project.Meanings[1]), new Word("bad", project.Meanings[2]) });
            project.Varieties[1].Words.AddRange(new[] { new Word("help", project.Meanings[0]), new Word("google search", project.Meanings[1]), new Word("goofy", project.Meanings[2]) });

            var segmenter        = new Segmenter();
            var varietySegmenter = new VarietySegmenter(project.Segmenter);

            foreach (Variety variety in project.Varieties)
            {
                varietySegmenter.Process(variety);
            }

            env.OpenProject(project);
            return(project);
        }
        public void UpdateCognacy_NoSimilarSegments()
        {
            var segmentPool = new SegmentPool();

            CogProject project = TestHelpers.GetTestProject(_spanFactory, segmentPool);
            project.Meanings.AddRange(new[] {new Meaning("gloss1", "cat1"), new Meaning("gloss2", "cat2"), new Meaning("gloss3", "cat3")});
            project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
            project.Varieties[0].Words.AddRange(new[] {new Word("hɛ.loʊ", project.Meanings[0]), new Word("gʊd", project.Meanings[1]), new Word("bæ", project.Meanings[2])});
            project.Varieties[1].Words.AddRange(new[] {new Word("hɛ.ɬa", project.Meanings[0]), new Word("gud", project.Meanings[1]), new Word("pæ", project.Meanings[2])});

            var varSegementer = new VarietySegmenter(project.Segmenter);
            foreach (Variety variety in project.Varieties)
                varSegementer.Process(variety);

            var vp = new VarietyPair(project.Varieties[0], project.Varieties[1]);
            project.VarietyPairs.Add(vp);

            var wordPairGenerator = new SimpleWordPairGenerator(segmentPool, project, 0.3, "primary");
            wordPairGenerator.Process(vp);

            var aligner = new TestWordAligner(segmentPool);
            var ignoredMappings = Substitute.For<ISegmentMappings>();
            var similarSegmentsMappings = Substitute.For<ISegmentMappings>();
            var cognateIdentifier = new BlairCognateIdentifier(segmentPool, false, false, ignoredMappings, similarSegmentsMappings);
            var wp = vp.WordPairs[0];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);

            wp = vp.WordPairs[1];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.True);

            wp = vp.WordPairs[2];
            cognateIdentifier.UpdateCognacy(wp, aligner.Compute(wp));
            Assert.That(wp.AreCognatePredicted, Is.False);
        }
            public TestEnvironment(string word1, string word2, bool ignoreRegularInsertionDeletion = false, bool regularConsEqual = false, bool automaticRegularCorrThreshold = false)
            {
                _segmentPool = new SegmentPool();
                _project = TestHelpers.GetTestProject(_spanFactory, _segmentPool);
                _project.Meanings.Add(new Meaning("gloss1", "cat1"));
                _project.Varieties.AddRange(new[] {new Variety("variety1"), new Variety("variety2")});
                _project.Varieties[0].Words.Add(new Word(word1, _project.Meanings[0]));
                _project.Varieties[1].Words.Add(new Word(word2, _project.Meanings[0]));

                var varSegementer = new VarietySegmenter(_project.Segmenter);
                foreach (Variety variety in _project.Varieties)
                    varSegementer.Process(variety);

                var vp = new VarietyPair(_project.Varieties[0], _project.Varieties[1]);
                _project.VarietyPairs.Add(vp);

                var wordPairGenerator = new SimpleWordPairGenerator(_segmentPool, _project, 0.3, "primary");
                wordPairGenerator.Process(vp);
                vp.CognateSoundCorrespondenceFrequencyDistribution = new ConditionalFrequencyDistribution<SoundContext, Ngram<Segment>>();

                var ignoredMappings = Substitute.For<ISegmentMappings>();
                var similarSegmentsMappings = Substitute.For<ISegmentMappings>();
                _cognateIdentifier = new BlairCognateIdentifier(_segmentPool, ignoreRegularInsertionDeletion, regularConsEqual, automaticRegularCorrThreshold,
                    3, ignoredMappings, similarSegmentsMappings);

                _aligner = new TestWordAligner(_segmentPool);
            }