public MultipleWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer <Word, ShapeNode> scorer, IEnumerable <Word> words) : base(wordAligner) { _words = new ReadOnlyList <Word>(words.ToArray()); _algorithm = new MultipleAlignmentAlgorithm <Word, ShapeNode>(scorer, _words, GetNodes); _algorithm.Compute(); }
private void UpdateSelectedChangeWordPairs(WordPairsViewModel wordPairs) { IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; wordPairs.SelectedCorrespondenceWordPairs.Clear(); foreach (WordPairViewModel wordPair in wordPairs.WordPairs) { bool selected = false; foreach (AlignedNodeViewModel node in wordPair.AlignedNodes) { if (_selectedSoundChange == null) { node.IsSelected = false; } else { SoundContext lhs = wordPair.DomainAlignment.ToSoundContext(_segmentPool, 0, node.Column, aligner.ContextualSoundClasses); Ngram <Segment> corr = wordPair.DomainAlignment[1, node.Column].ToNgram(_segmentPool); node.IsSelected = lhs.Equals(_selectedSoundChange.DomainSoundChangeLhs) && corr.Equals(_selectedSoundChange.DomainCorrespondence); if (node.IsSelected) { selected = true; } } } if (selected) { wordPairs.SelectedCorrespondenceWordPairs.Add(wordPair); } } }
public WordPairViewModel(IWordAligner aligner, WordPair wordPair, bool areVarietiesInOrder) { _wordPair = wordPair; _areVarietiesInOrder = areVarietiesInOrder; _meaning = new MeaningViewModel(_wordPair.Word1.Meaning); _variety1 = new VarietyViewModel(_wordPair.VarietyPair.Variety1); _variety2 = new VarietyViewModel(_wordPair.VarietyPair.Variety2); IWordAlignerResult results = aligner.Compute(_wordPair); _alignment = results.GetAlignments().First(); _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]); var nodes = new List<AlignedNodeViewModel>(); int i = 0; for (int column = 0; column < _alignment.ColumnCount; column++) { string note = null; if (i < _wordPair.AlignmentNotes.Count) note = _wordPair.AlignmentNotes[i]; nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note)); i++; } _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]); _alignedNodes = new ReadOnlyCollection<AlignedNodeViewModel>(nodes); _showInMultipleWordAlignmentCommand = new RelayCommand(ShowInMultipleWordAlignment); }
public MultipleWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer<Word, ShapeNode> scorer, IEnumerable<Word> words) : base(wordAligner) { _words = new ReadOnlyList<Word>(words.ToArray()); _algorithm = new MultipleAlignmentAlgorithm<Word, ShapeNode>(scorer, _words, GetNodes); _algorithm.Compute(); }
public WordPairViewModel(IWordAligner aligner, WordPair wordPair, bool areVarietiesInOrder) { _wordPair = wordPair; _areVarietiesInOrder = areVarietiesInOrder; _meaning = new MeaningViewModel(_wordPair.Word1.Meaning); _variety1 = new VarietyViewModel(_wordPair.VarietyPair.Variety1); _variety2 = new VarietyViewModel(_wordPair.VarietyPair.Variety2); IWordAlignerResult results = aligner.Compute(_wordPair); _alignment = results.GetAlignments().First(); _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]); var nodes = new List <AlignedNodeViewModel>(); int i = 0; for (int column = 0; column < _alignment.ColumnCount; column++) { string note = null; if (i < _wordPair.AlignmentNotes.Count) { note = _wordPair.AlignmentNotes[i]; } nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note)); i++; } _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]); _alignedNodes = new ReadOnlyCollection <AlignedNodeViewModel>(nodes); }
public SoundContext ToSoundContext(CogProject project, SegmentPool segmentPool) { IWordAligner aligner = project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; SoundClass leftEnv = LeftEnvironment == null ? null : aligner.ContextualSoundClasses.First(sc => sc.Name == LeftEnvironment); SoundClass rightEnv = RightEnvironment == null ? null : aligner.ContextualSoundClasses.First(sc => sc.Name == RightEnvironment); return(new SoundContext(leftEnv, new Ngram <Segment>(_target.Select(segmentPool.GetExisting)), rightEnv)); }
private void UpdateCognateCorrespondenceCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > cognateCorrCounts, Alignment <Word, ShapeNode> alignment) { for (int column = 0; column < alignment.ColumnCount; column++) { SoundContext lhs = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses); Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool); cognateCorrCounts[lhs].Increment(corr); } }
public VarietyPair ToVarietyPair(SegmentPool segmentPool, CogProject project) { var vp = new VarietyPair(project.Varieties[Variety1], project.Varieties[Variety2]) { PhoneticSimilarityScore = PhoneticSimilarityScore, LexicalSimilarityScore = LexicalSimilarityScore, DefaultCorrespondenceProbability = DefaultCorrespondenceProbability }; var wordPairs = new Dictionary <WordPairSurrogate, WordPair>(); vp.WordPairs.AddRange(_wordPairs.Select(surrogate => wordPairs.GetValue(surrogate, () => surrogate.ToWordPair(project, vp)))); var soundChanges = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); foreach (KeyValuePair <SoundContextSurrogate, Tuple <string[], int>[]> fd in _soundChanges) { SoundContext ctxt = fd.Key.ToSoundContext(project, segmentPool); FrequencyDistribution <Ngram <Segment> > freqDist = soundChanges[ctxt]; foreach (Tuple <string[], int> sample in fd.Value) { Ngram <Segment> corr = sample.Item1 == null ? new Ngram <Segment>() : new Ngram <Segment>(sample.Item1.Select(segmentPool.GetExisting)); freqDist.Increment(corr, sample.Item2); } } vp.SoundChangeFrequencyDistribution = soundChanges; IWordAligner aligner = project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; int segmentCount = vp.Variety2.SegmentFrequencyDistribution.ObservedSamples.Count; int possCorrCount = aligner.ExpansionCompressionEnabled ? (segmentCount * segmentCount) + segmentCount + 1 : segmentCount + 1; vp.SoundChangeProbabilityDistribution = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >(soundChanges, (sc, freqDist) => new WittenBellProbabilityDistribution <Ngram <Segment> >(freqDist, possCorrCount)); foreach (KeyValuePair <string, List <SoundCorrespondenceSurrogate> > kvp in _soundCorrespondenceCollections) { if (kvp.Value != null) { FeatureSymbol pos = null; switch (kvp.Key) { case "onset": pos = CogFeatureSystem.Onset; break; case "nucleus": pos = CogFeatureSystem.Nucleus; break; case "coda": pos = CogFeatureSystem.Coda; break; } vp.SoundCorrespondenceCollections[pos].AddRange(kvp.Value.Select(surrogate => surrogate.ToSoundCorrespondence(segmentPool, wordPairs))); } } return(vp); }
public PairwiseWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer<Word, ShapeNode> scorer, WordPairAlignerSettings settings, Word word1, Word word2) : base(wordAligner) { _words = new ReadOnlyList<Word>(new [] {word1, word2}); _algorithm = new PairwiseAlignmentAlgorithm<Word, ShapeNode>(scorer, word1, word2, GetNodes) { ExpansionCompressionEnabled = settings.ExpansionCompressionEnabled, Mode = settings.Mode }; _algorithm.Compute(); }
public override void Save(IWordAligner component, XElement elem) { var SCAAlign = (SCAAlign) component; SaveSettings(SCAAlign.Settings, elem); elem.Add(new XElement(ConfigManager.Cog + "RelevantFeatures", SCAAlign.FeatureWeights.Select(kvp => new XElement(ConfigManager.Cog + "RelevantFeature", new XAttribute("ref", kvp.Key.ID), new XAttribute("weight", kvp.Value), new XAttribute("vowel", SCAAlign.RelevantVowelFeatures.Contains(kvp.Key)), new XAttribute("consonant", SCAAlign.RelevantConsonantFeatures.Contains(kvp.Key)), kvp.Key.PossibleSymbols.Select(fs => new XElement(ConfigManager.Cog + "RelevantValue", new XAttribute("ref", fs.ID), new XAttribute("metric", SCAAlign.ValueMetrics[fs]))))))); }
public PairwiseWordAlignerResult(IWordAligner wordAligner, IPairwiseAlignmentScorer <Word, ShapeNode> scorer, WordPairAlignerSettings settings, Word word1, Word word2) : base(wordAligner) { _words = new ReadOnlyList <Word>(new [] { word1, word2 }); _algorithm = new PairwiseAlignmentAlgorithm <Word, ShapeNode>(scorer, word1, word2, GetNodes) { ExpansionCompressionEnabled = settings.ExpansionCompressionEnabled, Mode = settings.Mode }; _algorithm.Compute(); }
public override void Save(IWordAligner component, XElement elem) { var aline = (Aline)component; SaveSettings(aline.Settings, elem); elem.Add(new XElement(ConfigManager.Cog + "RelevantFeatures", aline.FeatureWeights.Select(kvp => new XElement(ConfigManager.Cog + "RelevantFeature", new XAttribute("ref", kvp.Key.ID), new XAttribute("weight", kvp.Value), new XAttribute("vowel", aline.RelevantVowelFeatures.Contains(kvp.Key)), new XAttribute("consonant", aline.RelevantConsonantFeatures.Contains(kvp.Key)), kvp.Key.PossibleSymbols.Select(fs => new XElement(ConfigManager.Cog + "RelevantValue", new XAttribute("ref", fs.ID), new XAttribute("metric", aline.ValueMetrics[fs]))))))); }
private bool M(VarietyPair pair) { IWordAligner aligner = _project.WordAligners[AlignerId]; int segmentCount = pair.Variety2.SegmentFrequencyDistribution.ObservedSamples.Count; int possCorrCount = aligner.ExpansionCompressionEnabled ? (segmentCount * segmentCount) + segmentCount + 1 : segmentCount + 1; var cpd = new ConditionalProbabilityDistribution <SoundContext, Ngram <Segment> >( pair.CognateSoundCorrespondenceFrequencyDistribution, (sc, fd) => new WittenBellProbabilityDistribution <Ngram <Segment> >(fd, possCorrCount)); bool converged = true; if (pair.CognateSoundCorrespondenceProbabilityDistribution == null || pair.CognateSoundCorrespondenceProbabilityDistribution.Conditions.Count != cpd.Conditions.Count) { converged = false; } else { foreach (SoundContext lhs in cpd.Conditions) { IProbabilityDistribution <Ngram <Segment> > probDist = cpd[lhs]; IProbabilityDistribution <Ngram <Segment> > oldProbDist; if (!pair.CognateSoundCorrespondenceProbabilityDistribution.TryGetProbabilityDistribution(lhs, out oldProbDist) || probDist.Samples.Count != oldProbDist.Samples.Count) { converged = false; break; } foreach (Ngram <Segment> correspondence in probDist.Samples) { if (Math.Abs(probDist[correspondence] - oldProbDist[correspondence]) > 0.0001) { converged = false; break; } } if (!converged) { break; } } } if (!converged) { pair.CognateSoundCorrespondenceProbabilityDistribution = cpd; pair.DefaultSoundCorrespondenceProbability = 1.0 / possCorrCount; } return(converged); }
public void Process(VarietyPair data) { IWordAligner aligner = _project.WordAligners[_alignerId]; var correspondenceColls = new Dictionary <FeatureSymbol, SoundCorrespondenceCollection> { { CogFeatureSystem.Onset, new SoundCorrespondenceCollection() }, { CogFeatureSystem.Nucleus, new SoundCorrespondenceCollection() }, { CogFeatureSystem.Coda, new SoundCorrespondenceCollection() } }; foreach (WordPair wordPair in data.WordPairs.Where(wp => wp.Cognacy)) { Alignment <Word, ShapeNode> alignment = aligner.Compute(wordPair).GetAlignments().First(); for (int i = 0; i < alignment.ColumnCount; i++) { AlignmentCell <ShapeNode> cell1 = alignment[0, i]; AlignmentCell <ShapeNode> cell2 = alignment[1, i]; if (!cell1.IsNull && !cell2.IsNull && cell1.Count == 1 && cell2.Count == 1) { SymbolicFeatureValue pos1, pos2; if (cell1.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos1) && cell2.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos2) && (FeatureSymbol)pos1 == (FeatureSymbol)pos2) { Ngram <Segment> ngram1 = cell1.ToNgram(_segmentPool); Ngram <Segment> ngram2 = cell2.ToNgram(_segmentPool); Segment seg1 = ngram1.First; Segment seg2 = ngram2.First; if (!seg1.Equals(seg2)) { SoundCorrespondenceCollection correspondences = correspondenceColls[(FeatureSymbol)pos1]; SoundCorrespondence corr; if (!correspondences.TryGet(seg1, seg2, out corr)) { corr = new SoundCorrespondence(seg1, seg2); correspondences.Add(corr); } corr.Frequency++; corr.WordPairs.Add(wordPair); } } } } } foreach (KeyValuePair <FeatureSymbol, SoundCorrespondenceCollection> kvp in correspondenceColls) { data.CognateSoundCorrespondencesByPosition[kvp.Key].ReplaceAll(kvp.Value); } }
public override void Save(IWordAligner component, XElement elem) { var aline = (Aline) component; SaveSettings(aline.Settings, elem); elem.Add(new XElement(ConfigManager.Cog + "RelevantFeatures", aline.FeatureWeights.Select(kvp => new XElement(ConfigManager.Cog + "RelevantFeature", new XAttribute("ref", kvp.Key.ID), new XAttribute("weight", kvp.Value), new XAttribute("vowel", aline.RelevantVowelFeatures.Contains(kvp.Key)), new XAttribute("consonant", aline.RelevantConsonantFeatures.Contains(kvp.Key)), kvp.Key.PossibleSymbols.Select(fs => new XElement(ConfigManager.Cog + "RelevantValue", new XAttribute("ref", fs.ID), new XAttribute("metric", aline.ValueMetrics[fs]))))))); elem.Add(new XElement(ConfigManager.Cog + "SoundChangeScoringEnabled", aline.Settings.SoundChangeScoringEnabled)); elem.Add(new XElement(ConfigManager.Cog + "SyllablePositionCostEnabled", aline.Settings.SyllablePositionCostEnabled)); }
private void UpdateCounts(IWordAligner aligner, ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > counts, Alignment <Word, ShapeNode> alignment) { if (alignment.NormalizedScore < _initialAlignmentThreshold) { return; } for (int column = 0; column < alignment.ColumnCount; column++) { SoundContext lhs = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses); Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool); counts[lhs].Increment(corr); } }
protected override ReturnCode DoWork(TextReader inputReader, TextWriter outputWriter, TextWriter errorWriter) { ReturnCode retcode = ReturnCode.Okay; if (!RawScores && !NormalizedScores) { Warnings.Add("Neither raw scores nor normalized scores were selected. Defaulting to normalized."); RawScores = false; NormalizedScores = true; } if (RawScores && NormalizedScores) { Warnings.Add("Please specify either raw or normalized scores, but not both. Defaulting to normalized."); RawScores = false; NormalizedScores = true; } SetupProject(); Meaning meaning = MeaningFactory.Create(); IWordAligner wordAligner = Project.WordAligners["primary"]; foreach (string line in ReadLines(inputReader)) { string[] wordTexts = line.Split(' '); if (wordTexts.Length != 2) { Errors.Add(line, "Each line should have two space-separated words in it."); continue; } Word[] words = wordTexts.Select(wordText => ParseWordOnce(wordText, meaning, Project)).ToArray(); if (words.Length != 2 || words.Any(w => w == null)) { Errors.Add(line, "One or more of this line's words failed to parse. Successfully parsed words: {0}", string.Join(", ", words.Where(w => w != null).Select(w => w.StrRep))); continue; } IWordAlignerResult result = wordAligner.Compute(words[0], words[1]); Alignment <Word, ShapeNode> alignment = result.GetAlignments().First(); outputWriter.WriteLine("{0} {1} {2}", words[0].StrRep, words[1].StrRep, RawScores ? alignment.RawScore : alignment.NormalizedScore); if (Verbose) { outputWriter.Write(alignment.ToString(Enumerable.Empty <string>())); outputWriter.WriteLine(); } } return(retcode); }
public void Process(VarietyPair varietyPair) { IWordAligner aligner = _project.WordAligners[_alignerID]; varietyPair.WordPairs.Clear(); var counts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings) { Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); if (words1.Length == 1 && words2.Length == 1) { Word word1 = words1.Single(); Word word2 = words2.Single(); WordPair wp = varietyPair.WordPairs.Add(word1, word2); Alignment <Word, ShapeNode> alignment = aligner.Compute(wp).GetAlignments().First(); wp.PhoneticSimilarityScore = alignment.NormalizedScore; UpdateCounts(aligner, counts, alignment); } else if (words1.Length > 0 && words2.Length > 0) { WordPair bestWordPair = null; Alignment <Word, ShapeNode> bestAlignment = null; foreach (Word w1 in words1) { foreach (Word w2 in words2) { Alignment <Word, ShapeNode> alignment = aligner.Compute(w1, w2).GetAlignments().First(); double score = alignment.NormalizedScore; if (bestWordPair == null || score > bestWordPair.PhoneticSimilarityScore) { bestWordPair = new WordPair(w1, w2) { PhoneticSimilarityScore = score }; bestAlignment = alignment; } } } varietyPair.WordPairs.Add(bestWordPair); UpdateCounts(aligner, counts, bestAlignment); } } varietyPair.SoundChangeFrequencyDistribution = counts; }
private static void WriteWordPairs(StreamWriter writer, IWordAligner aligner, IEnumerable<WordPair> wordPairs) { bool first = true; foreach (WordPair pair in wordPairs.OrderByDescending(wp => wp.PhoneticSimilarityScore)) { if (!first) writer.WriteLine(); IWordAlignerResult results = aligner.Compute(pair); Alignment<Word, ShapeNode> alignment = results.GetAlignments().First(); writer.Write(pair.Word1.Meaning.Gloss); if (!string.IsNullOrEmpty(pair.Word1.Meaning.Category)) writer.Write(" ({0})", pair.Word1.Meaning.Category); writer.WriteLine(); writer.Write(alignment.ToString(pair.AlignmentNotes)); writer.WriteLine("Similarity: {0:p}", pair.PhoneticSimilarityScore); first = false; } }
public void Export(Stream stream, IWordAligner aligner, VarietyPair varietyPair) { using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream))) { writer.WriteLine("Similarity"); writer.WriteLine("----------"); writer.WriteLine("Lexical: {0:p}", varietyPair.LexicalSimilarityScore); writer.WriteLine("Phonetic: {0:p}", varietyPair.PhoneticSimilarityScore); writer.WriteLine(); writer.WriteLine("Likely cognates"); writer.WriteLine("--------------"); WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => wp.Cognacy)); writer.WriteLine(); writer.WriteLine("Likely non-cognates"); writer.WriteLine("-------------------"); WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => !wp.Cognacy)); writer.WriteLine(); writer.WriteLine("Sound correspondences"); writer.WriteLine("---------------------"); bool first = true; foreach (SoundContext lhs in varietyPair.CognateSoundCorrespondenceProbabilityDistribution.Conditions) { if (!first) { writer.WriteLine(); } IProbabilityDistribution <Ngram <Segment> > probDist = varietyPair.CognateSoundCorrespondenceProbabilityDistribution[lhs]; FrequencyDistribution <Ngram <Segment> > freqDist = varietyPair.CognateSoundCorrespondenceFrequencyDistribution[lhs]; writer.WriteLine(lhs.ToString()); foreach (var correspondence in freqDist.ObservedSamples.Select(corr => new { Segment = corr, Probability = probDist[corr], Frequency = freqDist[corr] }).OrderByDescending(corr => corr.Probability)) { writer.WriteLine("{0}: {1:p}, {2}", correspondence.Segment, correspondence.Probability, correspondence.Frequency); } first = false; } } }
private void E(VarietyPair pair) { ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[CognateIdentifierId]; var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); IWordAligner aligner = _project.WordAligners[AlignerId]; int cognateCount = 0; double totalScore = 0; foreach (WordPair wordPair in pair.WordPairs) { IWordAlignerResult alignerResult = aligner.Compute(wordPair); cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); if (wordPair.Cognacy) { for (int column = 0; column < alignment.ColumnCount; column++) { SoundContext lhs = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses); Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool); cognateCorrCounts[lhs].Increment(corr); } cognateCount++; } wordPair.PhoneticSimilarityScore = alignment.NormalizedScore; totalScore += wordPair.PhoneticSimilarityScore; } pair.CognateCount = cognateCount; pair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts; if (pair.WordPairs.Count == 0) { pair.LexicalSimilarityScore = 0; pair.PhoneticSimilarityScore = 0; } else { pair.LexicalSimilarityScore = (double)cognateCount / pair.WordPairs.Count; pair.PhoneticSimilarityScore = totalScore / pair.WordPairs.Count; } }
public bool IsMapped(ShapeNode leftNode1, Ngram <Segment> target1, ShapeNode rightNode1, ShapeNode leftNode2, Ngram <Segment> target2, ShapeNode rightNode2) { if (_threshold == 0 || target1.Length == 0 || target2.Length == 0) { return(false); } IWordAligner aligner = _project.WordAligners[_alignerID]; foreach (Segment seg1 in target1) { foreach (Segment seg2 in target2) { if (aligner.Delta(seg1.FeatureStruct, seg2.FeatureStruct) <= _threshold) { return(true); } } } return(false); }
public VarietyPairViewModel(SegmentPool segmentPool, IProjectService projectService, WordPairsViewModel.Factory wordPairsFactory, VarietyPair varietyPair, bool areVarietiesInOrder) { _segmentPool = segmentPool; _projectService = projectService; _varietyPair = varietyPair; _areVarietiesInOrder = areVarietiesInOrder; IWordAligner aligner = projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; _cognates = wordPairsFactory(); foreach (WordPair wp in _varietyPair.WordPairs.Where(wp => wp.AreCognatePredicted)) { _cognates.WordPairs.Add(new WordPairViewModel(aligner, wp, _areVarietiesInOrder)); } _noncognates = wordPairsFactory(); foreach (WordPair wp in _varietyPair.WordPairs.Where(wp => !wp.AreCognatePredicted)) { _noncognates.WordPairs.Add(new WordPairViewModel(aligner, wp, _areVarietiesInOrder)); } _soundChanges = new ReadOnlyList <SoundChangeViewModel>(_varietyPair.SoundChangeProbabilityDistribution.Conditions.SelectMany(lhs => _varietyPair.SoundChangeProbabilityDistribution[lhs].Samples, (lhs, segment) => new SoundChangeViewModel(lhs, segment, _varietyPair.SoundChangeProbabilityDistribution[lhs][segment], _varietyPair.SoundChangeFrequencyDistribution[lhs][segment])).ToList()); }
private static void WriteWordPairs(StreamWriter writer, IWordAligner aligner, IEnumerable <WordPair> wordPairs) { bool first = true; foreach (WordPair pair in wordPairs.OrderByDescending(wp => wp.PhoneticSimilarityScore)) { if (!first) { writer.WriteLine(); } IWordAlignerResult results = aligner.Compute(pair); Alignment <Word, ShapeNode> alignment = results.GetAlignments().First(); writer.Write(pair.Word1.Meaning.Gloss); if (!string.IsNullOrEmpty(pair.Word1.Meaning.Category)) { writer.Write(" ({0})", pair.Word1.Meaning.Category); } writer.WriteLine(); writer.Write(alignment.ToString(pair.AlignmentNotes)); writer.WriteLine("Similarity: {0:p}", pair.PhoneticSimilarityScore); first = false; } }
public void Export(Stream stream, IWordAligner aligner, VarietyPair varietyPair) { using (var writer = new StreamWriter(new NonClosingStreamWrapper(stream))) { writer.WriteLine("Similarity"); writer.WriteLine("----------"); writer.WriteLine("Lexical: {0:p}", varietyPair.LexicalSimilarityScore); writer.WriteLine("Phonetic: {0:p}", varietyPair.PhoneticSimilarityScore); writer.WriteLine(); writer.WriteLine("Likely cognates"); writer.WriteLine("--------------"); WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => wp.Cognacy)); writer.WriteLine(); writer.WriteLine("Likely non-cognates"); writer.WriteLine("-------------------"); WriteWordPairs(writer, aligner, varietyPair.WordPairs.Where(wp => !wp.Cognacy)); writer.WriteLine(); writer.WriteLine("Sound correspondences"); writer.WriteLine("---------------------"); bool first = true; foreach (SoundContext lhs in varietyPair.CognateSoundCorrespondenceProbabilityDistribution.Conditions) { if (!first) writer.WriteLine(); IProbabilityDistribution<Ngram<Segment>> probDist = varietyPair.CognateSoundCorrespondenceProbabilityDistribution[lhs]; FrequencyDistribution<Ngram<Segment>> freqDist = varietyPair.CognateSoundCorrespondenceFrequencyDistribution[lhs]; writer.WriteLine(lhs.ToString()); foreach (var correspondence in freqDist.ObservedSamples.Select(corr => new {Segment = corr, Probability = probDist[corr], Frequency = freqDist[corr]}).OrderByDescending(corr => corr.Probability)) writer.WriteLine("{0}: {1:p}, {2}", correspondence.Segment, correspondence.Probability, correspondence.Frequency); first = false; } } }
public SegmentMappingsTableViewModel(IProjectService projectService, SegmentMappingsTableSegmentPairViewModel.Factory segmentPairFactory, SegmentMappingViewModel.Factory mappingFactory, IEnumerable <SegmentMappingViewModel> mappings, SoundType soundType, int threshold) { _threshold = threshold; _soundType = soundType; FeatureSymbol segmentType; switch (_soundType) { case SoundType.Consonant: segmentType = CogFeatureSystem.ConsonantType; break; case SoundType.Vowel: segmentType = CogFeatureSystem.VowelType; break; default: throw new InvalidEnumArgumentException(); } var segmentComparer = new SegmentComparer(); var categoryComparer = new SegmentCategoryComparer(); _segments = new ReadOnlyList <SegmentMappingsTableSegmentViewModel>(projectService.Project.Varieties.SelectMany(v => v.SegmentFrequencyDistribution.ObservedSamples) .Where(s => s.Type == segmentType).Distinct().OrderBy(s => s.Category(), categoryComparer).ThenBy(s => s, segmentComparer) .Select(s => new SegmentMappingsTableSegmentViewModel(s, _soundType)).Concat(new SegmentMappingsTableSegmentViewModel(null, _soundType)).ToArray()); _categories = new ReadOnlyList <SegmentCategoryViewModel>(_segments.GroupBy(s => s.DomainSegment == null ? string.Empty : s.DomainSegment.Category()) .OrderBy(g => g.Key, categoryComparer).Select(g => new SegmentCategoryViewModel(g.Key, g)).ToArray()); var mappingLookup = new Dictionary <UnorderedTuple <string, string>, HashSet <UnorderedTuple <string, string> > >(); foreach (SegmentMappingViewModel mapping in mappings) { string seg1, seg2; FeatureSymbol leftEnv1, rightEnv1, leftEnv2, rightEnv2; if (ListSegmentMappings.Normalize(projectService.Project.Segmenter, mapping.Segment1, out seg1, out leftEnv1, out rightEnv1) && ListSegmentMappings.Normalize(projectService.Project.Segmenter, mapping.Segment2, out seg2, out leftEnv2, out rightEnv2)) { UnorderedTuple <string, string> key = UnorderedTuple.Create(seg1, seg2); HashSet <UnorderedTuple <string, string> > m = mappingLookup.GetOrCreate(key, () => new HashSet <UnorderedTuple <string, string> >()); m.Add(UnorderedTuple.Create(mapping.Segment1, mapping.Segment2)); } } IWordAligner aligner = projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; foreach (SegmentMappingsTableSegmentViewModel segment1 in _segments) { bool isEnabled = true; foreach (SegmentMappingsTableSegmentViewModel segment2 in _segments) { if (EqualityComparer <Segment> .Default.Equals(segment1.DomainSegment, segment2.DomainSegment)) { isEnabled = false; } int delta = segment1.DomainSegment == null || segment2.DomainSegment == null ? -1 : aligner.Delta(segment1.DomainSegment.FeatureStruct, segment2.DomainSegment.FeatureStruct); SegmentMappingsTableSegmentPairViewModel segmentPair = segmentPairFactory(segment1, segment2, delta, isEnabled); segmentPair.MeetsThreshold = delta != -1 && delta <= _threshold; HashSet <UnorderedTuple <string, string> > pairMappings; if (mappingLookup.TryGetValue(UnorderedTuple.Create(segment1.StrRep, segment2.StrRep), out pairMappings)) { segmentPair.Mappings.Mappings.AddRange(pairMappings.Select(m => mappingFactory(m.Item1, m.Item2))); } segment1.SegmentPairs.Add(segmentPair); } } }
protected WordAlignerResultBase(IWordAligner wordAligner) { _wordAligner = wordAligner; }
public void Process(VarietyPair varietyPair) { IWordAligner aligner = _project.WordAligners[_alignerID]; var ambiguousMeanings = new List <Tuple <Meaning, IWordAlignerResult, IWordAlignerResult[]> >(); varietyPair.WordPairs.Clear(); var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); int cognateCount = 0; foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings) { Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); if (words1.Length == 1 && words2.Length == 1) { Word word1 = words1.Single(); Word word2 = words2.Single(); WordPair wp = varietyPair.WordPairs.Add(word1, word2); _project.CognacyDecisions.UpdateActualCognacy(wp); IWordAlignerResult alignerResult = aligner.Compute(wp); _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); if (wp.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment); cognateCount++; } wp.PhoneticSimilarityScore = alignment.NormalizedScore; } else if (words1.Length > 0 && words2.Length > 0) { IWordAlignerResult[] alignerResults = words1.SelectMany(w1 => words2.Select(w2 => aligner.Compute(w1, w2))).ToArray(); IWordAlignerResult maxAlignerResult = alignerResults.MaxBy(a => a.BestRawScore); ambiguousMeanings.Add(Tuple.Create(meaning, maxAlignerResult, alignerResults)); WordPair wp = varietyPair.WordPairs.Add(maxAlignerResult.Words[0], maxAlignerResult.Words[1]); _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, maxAlignerResult); } } ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[_cognateIdentifierID]; for (int i = 0; i < ambiguousMeanings.Count; i++) { ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > newCognateCorrCounts = cognateCorrCounts.Clone(); int newCognateCount = cognateCount; for (int j = i + 1; j < ambiguousMeanings.Count; j++) { if (varietyPair.WordPairs[ambiguousMeanings[j].Item1].Cognacy) { UpdateCognateCorrespondenceCounts(aligner, newCognateCorrCounts, ambiguousMeanings[j].Item2.GetAlignments().First()); newCognateCount++; } } IWordAlignerResult bestAlignerResult = null; WordPair bestWordPair = null; foreach (IWordAlignerResult alignerResult in ambiguousMeanings[i].Item3) { ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > alignmentCognateCorrCounts = newCognateCorrCounts.Clone(); int alignmentCognateCount = newCognateCount; Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1); WordPair wordPair = varietyPair.WordPairs.Add(alignerResult.Words[0], alignerResult.Words[1]); _thresholdCognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult); if (wordPair.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, alignmentCognateCorrCounts, alignment); alignmentCognateCount++; } varietyPair.CognateCount = alignmentCognateCount; varietyPair.CognateSoundCorrespondenceFrequencyDistribution = alignmentCognateCorrCounts; cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult); wordPair.PhoneticSimilarityScore = alignment.NormalizedScore; if (bestWordPair == null || Compare(wordPair, bestWordPair) > 0) { bestWordPair = wordPair; bestAlignerResult = alignerResult; } } Debug.Assert(bestWordPair != null); varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1); varietyPair.WordPairs.Add(bestWordPair); _project.CognacyDecisions.UpdateActualCognacy(bestWordPair); if (bestWordPair.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, bestAlignerResult.GetAlignments().First()); cognateCount++; } } varietyPair.CognateCount = cognateCount; varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts; }
public void Process(VarietyPair varietyPair) { IWordAligner aligner = _project.WordAligners[_alignerID]; varietyPair.WordPairs.Clear(); var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); int cognateCount = 0; foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings) { Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); if (words1.Length == 1 && words2.Length == 1) { Word word1 = words1.Single(); Word word2 = words2.Single(); WordPair wp = varietyPair.WordPairs.Add(word1, word2); _project.CognacyDecisions.UpdateActualCognacy(wp); IWordAlignerResult alignerResult = aligner.Compute(wp); _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); if (wp.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment); cognateCount++; } wp.PhoneticSimilarityScore = alignment.NormalizedScore; } else if (words1.Length > 0 && words2.Length > 0) { WordPair bestWordPair = null; IWordAlignerResult bestAlignerResult = null; foreach (Word w1 in words1) { foreach (Word w2 in words2) { IWordAlignerResult alignerResult = aligner.Compute(w1, w2); if (bestAlignerResult == null || alignerResult.BestRawScore > bestAlignerResult.BestRawScore) { bestWordPair = new WordPair(w1, w2); bestAlignerResult = alignerResult; } } } Debug.Assert(bestWordPair != null); varietyPair.WordPairs.Add(bestWordPair); _project.CognacyDecisions.UpdateActualCognacy(bestWordPair); _thresholdCognateIdentifier.UpdatePredictedCognacy(bestWordPair, bestAlignerResult); Alignment <Word, ShapeNode> alignment = bestAlignerResult.GetAlignments().First(); if (bestWordPair.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment); cognateCount++; } bestWordPair.PhoneticSimilarityScore = alignment.NormalizedScore; } } varietyPair.CognateCount = cognateCount; varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts; }
private void AlignWords() { if (_selectedMeaning == null) { return; } _busyService.ShowBusyIndicatorUntilFinishDrawing(); var words = new HashSet <Word>(); foreach (VarietyPair vp in _projectService.Project.VarietyPairs) { WordPair wp; if (vp.WordPairs.TryGetValue(_selectedMeaning.DomainMeaning, out wp)) { words.Add(wp.Word1); words.Add(wp.Word2); } } if (words.Count == 0) { _words.Clear(); return; } IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; Alignment <Word, ShapeNode> alignment; if (words.Count == 1) { Word word = words.First(); Annotation <ShapeNode> prefixAnn = word.Prefix; var prefix = new AlignmentCell <ShapeNode>(prefixAnn != null ? word.Shape.GetNodes(prefixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>()); IEnumerable <AlignmentCell <ShapeNode> > columns = word.Shape.GetNodes(word.Stem.Span).Where(NodeFilter).Select(n => new AlignmentCell <ShapeNode>(n)); Annotation <ShapeNode> suffixAnn = word.Suffix; var suffix = new AlignmentCell <ShapeNode>(suffixAnn != null ? word.Shape.GetNodes(suffixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>()); alignment = new Alignment <Word, ShapeNode>(0, 0, Tuple.Create(word, prefix, columns, suffix)); } else { IWordAlignerResult result = aligner.Compute(words); alignment = result.GetAlignments().First(); } List <Cluster <Word> > cognateSets = _projectService.Project.GenerateCognateSets(_selectedMeaning.DomainMeaning).OrderBy(c => c.Noise).ThenByDescending(c => c.DataObjects.Count).ToList(); ColumnCount = alignment.ColumnCount; using (_words.BulkUpdate()) { _words.Clear(); for (int i = 0; i < alignment.SequenceCount; i++) { AlignmentCell <ShapeNode> prefix = alignment.Prefixes[i]; Word word = alignment.Sequences[i]; IEnumerable <AlignmentCell <ShapeNode> > columns = Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[i, col]); AlignmentCell <ShapeNode> suffix = alignment.Suffixes[i]; int cognateSetIndex = cognateSets.FindIndex(set => set.DataObjects.Contains(word)); _words.Add(new MultipleWordAlignmentWordViewModel(word, prefix, columns, suffix, cognateSetIndex == cognateSets.Count - 1 ? int.MaxValue : cognateSetIndex + 1)); } } }