public WordPairViewModel(IProjectService projectService, IAnalysisService analysisService, WordPair wordPair, bool areVarietiesInOrder) { _projectService = projectService; _analysisService = analysisService; _wordPair = wordPair; _areVarietiesInOrder = areVarietiesInOrder; _meaning = new MeaningViewModel(_wordPair.Word1.Meaning); _variety1 = new VarietyViewModel(_wordPair.VarietyPair.Variety1); _variety2 = new VarietyViewModel(_wordPair.VarietyPair.Variety2); IWordAlignerResult results = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner].Compute(_wordPair); _alignment = results.GetAlignments().First(); _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]); var nodes = new List <AlignedNodeViewModel>(); int i = 0; for (int column = 0; column < _alignment.ColumnCount; column++) { string note = null; if (i < _wordPair.AlignmentNotes.Count) { note = _wordPair.AlignmentNotes[i]; } nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note)); i++; } _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]); _alignedNodes = new ReadOnlyCollection <AlignedNodeViewModel>(nodes); _showInMultipleWordAlignmentCommand = new RelayCommand(ShowInMultipleWordAlignment); _pinUnpinCommand = new RelayCommand(PinUnpin); }
public WordPairViewModel(IWordAligner aligner, WordPair wordPair, bool areVarietiesInOrder) { _wordPair = wordPair; _areVarietiesInOrder = areVarietiesInOrder; _meaning = new MeaningViewModel(_wordPair.Word1.Meaning); _variety1 = new VarietyViewModel(_wordPair.VarietyPair.Variety1); _variety2 = new VarietyViewModel(_wordPair.VarietyPair.Variety2); IWordAlignerResult results = aligner.Compute(_wordPair); _alignment = results.GetAlignments().First(); _prefixNode = new AlignedNodeViewModel(_alignment.Prefixes[0], _alignment.Prefixes[1]); var nodes = new List <AlignedNodeViewModel>(); int i = 0; for (int column = 0; column < _alignment.ColumnCount; column++) { string note = null; if (i < _wordPair.AlignmentNotes.Count) { note = _wordPair.AlignmentNotes[i]; } nodes.Add(new AlignedNodeViewModel(column, _alignment[0, column], _alignment[1, column], note)); i++; } _suffixNode = new AlignedNodeViewModel(_alignment.Suffixes[0], _alignment.Suffixes[1]); _alignedNodes = new ReadOnlyCollection <AlignedNodeViewModel>(nodes); }
private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment <Word, ShapeNode> alignment, int column, Ngram <Segment> v) { VarietyPair vp = wordPair.VarietyPair; SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses); FrequencyDistribution <Ngram <Segment> > freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context]; int threshold; if (AutomaticRegularCorrespondenceThreshold) { int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions .Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment) .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]); if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count, out threshold)) { threshold = DefaultRegularCorrespondenceThreshold; } } else { threshold = DefaultRegularCorrespondenceThreshold; } return(freqDist[v] >= threshold); }
public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); int initialEquivalentClasses = 0; bool mismatchFound = false; for (int column = 0; column < alignment.ColumnCount; column++) { AlignmentCell <ShapeNode> cell1 = alignment[0, column]; AlignmentCell <ShapeNode> cell2 = alignment[1, column]; if ((cell1.Count > 0 && cell1[0].Type() == CogFeatureSystem.VowelType) || (cell2.Count > 0 && cell2[0].Type() == CogFeatureSystem.VowelType)) { wordPair.AlignmentNotes.Add("X"); } else { if (cell1.StrRep() == cell2.StrRep()) { wordPair.AlignmentNotes.Add("1"); if (!mismatchFound) { initialEquivalentClasses++; } } else { SoundClass sc1; if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 0, column, out sc1)) { sc1 = null; } SoundClass sc2; if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 1, column, out sc2)) { sc2 = null; } if (sc1 != null && sc2 != null && sc1 == sc2) { wordPair.AlignmentNotes.Add("1"); if (!mismatchFound) { initialEquivalentClasses++; } } else { wordPair.AlignmentNotes.Add("0"); mismatchFound = true; } } } } wordPair.AreCognatePredicted = !mismatchFound || initialEquivalentClasses >= _initialEquivalenceThreshold; wordPair.CognicityScore = (double)initialEquivalentClasses / alignment.ColumnCount; }
public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult) { int alignmentCount = 0; double totalAlignmentScore = 0.0; foreach (Alignment <Word, ShapeNode> alignment in alignerResult.GetAlignments()) { totalAlignmentScore += alignment.NormalizedScore; alignmentCount++; } wordPair.CognicityScore = totalAlignmentScore / alignmentCount; wordPair.AreCognatePredicted = wordPair.CognicityScore >= _threshold; }
protected override ReturnCode DoWork(TextReader inputReader, TextWriter outputWriter, TextWriter errorWriter) { ReturnCode retcode = ReturnCode.Okay; if (!RawScores && !NormalizedScores) { Warnings.Add("Neither raw scores nor normalized scores were selected. Defaulting to normalized."); RawScores = false; NormalizedScores = true; } if (RawScores && NormalizedScores) { Warnings.Add("Please specify either raw or normalized scores, but not both. Defaulting to normalized."); RawScores = false; NormalizedScores = true; } SetupProject(); Meaning meaning = MeaningFactory.Create(); IWordAligner wordAligner = Project.WordAligners["primary"]; foreach (string line in ReadLines(inputReader)) { string[] wordTexts = line.Split(' '); if (wordTexts.Length != 2) { Errors.Add(line, "Each line should have two space-separated words in it."); continue; } Word[] words = wordTexts.Select(wordText => ParseWordOnce(wordText, meaning, Project)).ToArray(); if (words.Length != 2 || words.Any(w => w == null)) { Errors.Add(line, "One or more of this line's words failed to parse. Successfully parsed words: {0}", string.Join(", ", words.Where(w => w != null).Select(w => w.StrRep))); continue; } IWordAlignerResult result = wordAligner.Compute(words[0], words[1]); Alignment <Word, ShapeNode> alignment = result.GetAlignments().First(); outputWriter.WriteLine("{0} {1} {2}", words[0].StrRep, words[1].StrRep, RawScores ? alignment.RawScore : alignment.NormalizedScore); if (Verbose) { outputWriter.Write(alignment.ToString(Enumerable.Empty <string>())); outputWriter.WriteLine(); } } return(retcode); }
public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); Alignment<Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); int initialEquivalentClasses = 0; bool mismatchFound = false; for (int column = 0; column < alignment.ColumnCount; column++) { AlignmentCell<ShapeNode> cell1 = alignment[0, column]; AlignmentCell<ShapeNode> cell2 = alignment[1, column]; if ((cell1.Count > 0 && cell1[0].Type() == CogFeatureSystem.VowelType) || (cell2.Count > 0 && cell2[0].Type() == CogFeatureSystem.VowelType)) { wordPair.AlignmentNotes.Add("X"); } else { if (cell1.StrRep() == cell2.StrRep()) { wordPair.AlignmentNotes.Add("1"); if (!mismatchFound) initialEquivalentClasses++; } else { SoundClass sc1; if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 0, column, out sc1)) sc1 = null; SoundClass sc2; if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 1, column, out sc2)) sc2 = null; if (sc1 != null && sc2 != null && sc1 == sc2) { wordPair.AlignmentNotes.Add("1"); if (!mismatchFound) initialEquivalentClasses++; } else { wordPair.AlignmentNotes.Add("0"); mismatchFound = true; } } } } wordPair.PredictedCognacy = !mismatchFound || initialEquivalentClasses >= _initialEquivalenceThreshold; wordPair.PredictedCognacyScore = (double) initialEquivalentClasses / alignment.ColumnCount; }
private void E(VarietyPair pair) { ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[CognateIdentifierId]; var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); IWordAligner aligner = _project.WordAligners[AlignerId]; int cognateCount = 0; double totalScore = 0; foreach (WordPair wordPair in pair.WordPairs) { IWordAlignerResult alignerResult = aligner.Compute(wordPair); cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); if (wordPair.Cognacy) { for (int column = 0; column < alignment.ColumnCount; column++) { SoundContext lhs = alignment.ToSoundContext(_segmentPool, 0, column, aligner.ContextualSoundClasses); Ngram <Segment> corr = alignment[1, column].ToNgram(_segmentPool); cognateCorrCounts[lhs].Increment(corr); } cognateCount++; } wordPair.PhoneticSimilarityScore = alignment.NormalizedScore; totalScore += wordPair.PhoneticSimilarityScore; } pair.CognateCount = cognateCount; pair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts; if (pair.WordPairs.Count == 0) { pair.LexicalSimilarityScore = 0; pair.PhoneticSimilarityScore = 0; } else { pair.LexicalSimilarityScore = (double)cognateCount / pair.WordPairs.Count; pair.PhoneticSimilarityScore = totalScore / pair.WordPairs.Count; } }
private static void WriteWordPairs(StreamWriter writer, IWordAligner aligner, IEnumerable <WordPair> wordPairs) { bool first = true; foreach (WordPair pair in wordPairs.OrderByDescending(wp => wp.PhoneticSimilarityScore)) { if (!first) { writer.WriteLine(); } IWordAlignerResult results = aligner.Compute(pair); Alignment <Word, ShapeNode> alignment = results.GetAlignments().First(); writer.Write(pair.Word1.Meaning.Gloss); if (!string.IsNullOrEmpty(pair.Word1.Meaning.Category)) { writer.Write(" ({0})", pair.Word1.Meaning.Category); } writer.WriteLine(); writer.Write(alignment.ToString(pair.AlignmentNotes)); writer.WriteLine("Similarity: {0:p}", pair.PhoneticSimilarityScore); first = false; } }
private void AlignWords() { if (_selectedMeaning == null) { return; } _busyService.ShowBusyIndicatorUntilFinishDrawing(); var words = new HashSet <Word>(); foreach (VarietyPair vp in _projectService.Project.VarietyPairs) { WordPair wp; if (vp.WordPairs.TryGetValue(_selectedMeaning.DomainMeaning, out wp)) { words.Add(wp.Word1); words.Add(wp.Word2); } } if (words.Count == 0) { _words.Clear(); return; } IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; Alignment <Word, ShapeNode> alignment; if (words.Count == 1) { Word word = words.First(); Annotation <ShapeNode> prefixAnn = word.Prefix; var prefix = new AlignmentCell <ShapeNode>(prefixAnn != null ? word.Shape.GetNodes(prefixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>()); IEnumerable <AlignmentCell <ShapeNode> > columns = word.Shape.GetNodes(word.Stem.Span).Where(NodeFilter).Select(n => new AlignmentCell <ShapeNode>(n)); Annotation <ShapeNode> suffixAnn = word.Suffix; var suffix = new AlignmentCell <ShapeNode>(suffixAnn != null ? word.Shape.GetNodes(suffixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>()); alignment = new Alignment <Word, ShapeNode>(0, 0, Tuple.Create(word, prefix, columns, suffix)); } else { IWordAlignerResult result = aligner.Compute(words); alignment = result.GetAlignments().First(); } List <Cluster <Word> > cognateSets = _projectService.Project.GenerateCognateSets(_selectedMeaning.DomainMeaning).OrderBy(c => c.Noise).ThenByDescending(c => c.DataObjects.Count).ToList(); ColumnCount = alignment.ColumnCount; using (_words.BulkUpdate()) { _words.Clear(); for (int i = 0; i < alignment.SequenceCount; i++) { AlignmentCell <ShapeNode> prefix = alignment.Prefixes[i]; Word word = alignment.Sequences[i]; IEnumerable <AlignmentCell <ShapeNode> > columns = Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[i, col]); AlignmentCell <ShapeNode> suffix = alignment.Suffixes[i]; int cognateSetIndex = cognateSets.FindIndex(set => set.DataObjects.Contains(word)); _words.Add(new MultipleWordAlignmentWordViewModel(word, prefix, columns, suffix, cognateSetIndex == cognateSets.Count - 1 ? int.MaxValue : cognateSetIndex + 1)); } } }
public void Process(VarietyPair varietyPair) { IWordAligner aligner = _project.WordAligners[_alignerID]; var ambiguousMeanings = new List <Tuple <Meaning, IWordAlignerResult, IWordAlignerResult[]> >(); varietyPair.WordPairs.Clear(); var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); int cognateCount = 0; foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings) { Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); if (words1.Length == 1 && words2.Length == 1) { Word word1 = words1.Single(); Word word2 = words2.Single(); WordPair wp = varietyPair.WordPairs.Add(word1, word2); _project.CognacyDecisions.UpdateActualCognacy(wp); IWordAlignerResult alignerResult = aligner.Compute(wp); _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); if (wp.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment); cognateCount++; } wp.PhoneticSimilarityScore = alignment.NormalizedScore; } else if (words1.Length > 0 && words2.Length > 0) { IWordAlignerResult[] alignerResults = words1.SelectMany(w1 => words2.Select(w2 => aligner.Compute(w1, w2))).ToArray(); IWordAlignerResult maxAlignerResult = alignerResults.MaxBy(a => a.BestRawScore); ambiguousMeanings.Add(Tuple.Create(meaning, maxAlignerResult, alignerResults)); WordPair wp = varietyPair.WordPairs.Add(maxAlignerResult.Words[0], maxAlignerResult.Words[1]); _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, maxAlignerResult); } } ICognateIdentifier cognateIdentifier = _project.CognateIdentifiers[_cognateIdentifierID]; for (int i = 0; i < ambiguousMeanings.Count; i++) { ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > newCognateCorrCounts = cognateCorrCounts.Clone(); int newCognateCount = cognateCount; for (int j = i + 1; j < ambiguousMeanings.Count; j++) { if (varietyPair.WordPairs[ambiguousMeanings[j].Item1].Cognacy) { UpdateCognateCorrespondenceCounts(aligner, newCognateCorrCounts, ambiguousMeanings[j].Item2.GetAlignments().First()); newCognateCount++; } } IWordAlignerResult bestAlignerResult = null; WordPair bestWordPair = null; foreach (IWordAlignerResult alignerResult in ambiguousMeanings[i].Item3) { ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> > alignmentCognateCorrCounts = newCognateCorrCounts.Clone(); int alignmentCognateCount = newCognateCount; Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1); WordPair wordPair = varietyPair.WordPairs.Add(alignerResult.Words[0], alignerResult.Words[1]); _thresholdCognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult); if (wordPair.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, alignmentCognateCorrCounts, alignment); alignmentCognateCount++; } varietyPair.CognateCount = alignmentCognateCount; varietyPair.CognateSoundCorrespondenceFrequencyDistribution = alignmentCognateCorrCounts; cognateIdentifier.UpdatePredictedCognacy(wordPair, alignerResult); wordPair.PhoneticSimilarityScore = alignment.NormalizedScore; if (bestWordPair == null || Compare(wordPair, bestWordPair) > 0) { bestWordPair = wordPair; bestAlignerResult = alignerResult; } } Debug.Assert(bestWordPair != null); varietyPair.WordPairs.Remove(ambiguousMeanings[i].Item1); varietyPair.WordPairs.Add(bestWordPair); _project.CognacyDecisions.UpdateActualCognacy(bestWordPair); if (bestWordPair.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, bestAlignerResult.GetAlignments().First()); cognateCount++; } } varietyPair.CognateCount = cognateCount; varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts; }
public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); int cat1Count = 0; int cat1And2Count = 0; int totalCount = 0; Alignment<Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); for (int column = 0; column < alignment.ColumnCount; column++) { ShapeNode uLeftNode = alignment.GetLeftNode(0, column); Ngram<Segment> u = alignment[0, column].ToNgram(_segmentPool); ShapeNode uRightNode = alignment.GetRightNode(0, column); ShapeNode vLeftNode = alignment.GetLeftNode(1, column); Ngram<Segment> v = alignment[1, column].ToNgram(_segmentPool); ShapeNode vRightNode = alignment.GetRightNode(1, column); int cat = 3; if (u.Equals(v)) { cat = 1; } else if (_ignoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 0; } else if (u.Length == 0 || v.Length == 0) { if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) cat = 1; else if (_ignoreRegularInsertionDeletion && IsRegular(wordPair, alignerResult, alignment, column, v)) cat = 0; } else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType) { cat = _similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2; } else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType) { if (_regularConsEqual) { if (IsRegular(wordPair, alignerResult, alignment, column, v)) cat = 1; else if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) cat = 2; } else { if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) cat = IsRegular(wordPair, alignerResult, alignment, column, v) ? 1 : 2; } } if (cat > 0 && cat < 3) { cat1And2Count++; if (cat == 1) cat1Count++; } wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture)); if (cat > 0) totalCount++; } double type1Score = (double) cat1Count / totalCount; double type1And2Score = (double) cat1And2Count / totalCount; wordPair.PredictedCognacy = type1Score >= 0.5 && type1And2Score >= 0.75; wordPair.PredictedCognacyScore = (type1Score * 0.75) + (type1And2Score * 0.25); }
public void Process(VarietyPair varietyPair) { IWordAligner aligner = _project.WordAligners[_alignerID]; varietyPair.WordPairs.Clear(); var cognateCorrCounts = new ConditionalFrequencyDistribution <SoundContext, Ngram <Segment> >(); int cognateCount = 0; foreach (Meaning meaning in varietyPair.Variety1.Words.Meanings) { Word[] words1 = varietyPair.Variety1.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); Word[] words2 = varietyPair.Variety2.Words[meaning].Where(w => w.Shape.Count > 0).ToArray(); if (words1.Length == 1 && words2.Length == 1) { Word word1 = words1.Single(); Word word2 = words2.Single(); WordPair wp = varietyPair.WordPairs.Add(word1, word2); _project.CognacyDecisions.UpdateActualCognacy(wp); IWordAlignerResult alignerResult = aligner.Compute(wp); _thresholdCognateIdentifier.UpdatePredictedCognacy(wp, alignerResult); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); if (wp.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment); cognateCount++; } wp.PhoneticSimilarityScore = alignment.NormalizedScore; } else if (words1.Length > 0 && words2.Length > 0) { WordPair bestWordPair = null; IWordAlignerResult bestAlignerResult = null; foreach (Word w1 in words1) { foreach (Word w2 in words2) { IWordAlignerResult alignerResult = aligner.Compute(w1, w2); if (bestAlignerResult == null || alignerResult.BestRawScore > bestAlignerResult.BestRawScore) { bestWordPair = new WordPair(w1, w2); bestAlignerResult = alignerResult; } } } Debug.Assert(bestWordPair != null); varietyPair.WordPairs.Add(bestWordPair); _project.CognacyDecisions.UpdateActualCognacy(bestWordPair); _thresholdCognateIdentifier.UpdatePredictedCognacy(bestWordPair, bestAlignerResult); Alignment <Word, ShapeNode> alignment = bestAlignerResult.GetAlignments().First(); if (bestWordPair.Cognacy) { UpdateCognateCorrespondenceCounts(aligner, cognateCorrCounts, alignment); cognateCount++; } bestWordPair.PhoneticSimilarityScore = alignment.NormalizedScore; } } varietyPair.CognateCount = cognateCount; varietyPair.CognateSoundCorrespondenceFrequencyDistribution = cognateCorrCounts; }
private bool IsRegular(WordPair wordPair, IWordAlignerResult alignerResult, Alignment<Word, ShapeNode> alignment, int column, Ngram<Segment> v) { VarietyPair vp = wordPair.VarietyPair; SoundContext context = alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses); FrequencyDistribution<Ngram<Segment>> freqDist = vp.CognateSoundCorrespondenceFrequencyDistribution[context]; int threshold; if (_automaticRegularCorrespondenceThreshold) { int seg2Count = vp.CognateSoundCorrespondenceFrequencyDistribution.Conditions.Where(sc => sc.LeftEnvironment == context.LeftEnvironment && sc.RightEnvironment == context.RightEnvironment) .Sum(sc => vp.CognateSoundCorrespondenceFrequencyDistribution[sc][v]); if (!_regularCorrespondenceThresholdTable.TryGetThreshold(vp.CognateCount, freqDist.SampleOutcomeCount, seg2Count, out threshold)) threshold = _defaultRegularCorrepondenceThreshold; } else { threshold = _defaultRegularCorrepondenceThreshold; } return freqDist[v] >= threshold; }
public void UpdateCognacy(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.CognacyScore = alignerResult.GetAlignments().First().NormalizedScore; wordPair.AreCognatePredicted = wordPair.CognacyScore >= _threshold; }
public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); int cat1Count = 0; int cat1And2Count = 0; int totalCount = 0; Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); for (int column = 0; column < alignment.ColumnCount; column++) { ShapeNode uLeftNode = alignment.GetLeftNode(0, column); Ngram <Segment> u = alignment[0, column].ToNgram(_segmentPool); ShapeNode uRightNode = alignment.GetRightNode(0, column); ShapeNode vLeftNode = alignment.GetLeftNode(1, column); Ngram <Segment> v = alignment[1, column].ToNgram(_segmentPool); ShapeNode vRightNode = alignment.GetRightNode(1, column); bool regular = wordPair.VarietyPair.SoundChangeFrequencyDistribution[alignment.ToSoundContext(_segmentPool, 0, column, alignerResult.WordAligner.ContextualSoundClasses)][v] >= 3; int cat = 3; if (u.Equals(v)) { cat = 1; } else if (_ignoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 0; } else if (u.Length == 0 || v.Length == 0) { if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) || regular) { cat = _ignoreRegularInsertionDeletion ? 0 : 1; } } else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType) { cat = _similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2; } else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType) { if (_regularConsEqual) { if (regular) { cat = 1; } else if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 2; } } else { if (_similarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = regular ? 1 : 2; } } } if (cat > 0 && cat < 3) { cat1And2Count++; if (cat == 1) { cat1Count++; } } wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture)); if (cat > 0) { totalCount++; } } double type1Score = (double)cat1Count / totalCount; double type1And2Score = (double)cat1And2Count / totalCount; wordPair.AreCognatePredicted = type1Score >= 0.5 && type1And2Score >= 0.75; wordPair.CognicityScore = (type1Score * 0.75) + (type1And2Score * 0.25); }
public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.PredictedCognacyScore = alignerResult.GetAlignments().First().NormalizedScore; wordPair.PredictedCognacy = wordPair.PredictedCognacyScore >= _threshold; }
public void UpdatePredictedCognacy(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); int cat1Count = 0; int cat1And2Count = 0; int totalCount = 0; Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); for (int column = 0; column < alignment.ColumnCount; column++) { ShapeNode uLeftNode = alignment.GetLeftNode(0, column); Ngram <Segment> u = alignment[0, column].ToNgram(_segmentPool); ShapeNode uRightNode = alignment.GetRightNode(0, column); ShapeNode vLeftNode = alignment.GetLeftNode(1, column); Ngram <Segment> v = alignment[1, column].ToNgram(_segmentPool); ShapeNode vRightNode = alignment.GetRightNode(1, column); int cat = 3; if (u.Equals(v)) { cat = 1; } else if (IgnoredMappings.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 0; } else if (u.Length == 0 || v.Length == 0) { if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 1; } else if (IgnoreRegularInsertionDeletion && IsRegular(wordPair, alignerResult, alignment, column, v)) { cat = 0; } } else if (u[0].Type == CogFeatureSystem.VowelType && v[0].Type == CogFeatureSystem.VowelType) { cat = SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode) ? 1 : 2; } else if (u[0].Type == CogFeatureSystem.ConsonantType && v[0].Type == CogFeatureSystem.ConsonantType) { if (RegularConsonantEqual) { if (IsRegular(wordPair, alignerResult, alignment, column, v)) { cat = 1; } else if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = 2; } } else { if (SimilarSegments.IsMapped(uLeftNode, u, uRightNode, vLeftNode, v, vRightNode)) { cat = IsRegular(wordPair, alignerResult, alignment, column, v) ? 1 : 2; } } } if (cat > 0 && cat < 3) { cat1And2Count++; if (cat == 1) { cat1Count++; } } wordPair.AlignmentNotes.Add(cat == 0 ? "-" : cat.ToString(CultureInfo.InvariantCulture)); if (cat > 0) { totalCount++; } } double type1Score = (double)cat1Count / totalCount; double type1And2Score = (double)cat1And2Count / totalCount; wordPair.PredictedCognacy = type1Score >= 0.5 && type1And2Score >= 0.75; wordPair.PredictedCognacyScore = (type1Score * 0.75) + (type1And2Score * 0.25); }