protected static Alignment <string, char> CreateAlignment(params string[] alignment) { var sequences = new Tuple <string, AlignmentCell <char>, IEnumerable <AlignmentCell <char> >, AlignmentCell <char> > [alignment.GetLength(0)]; for (int i = 0; i < alignment.Length; i++) { var sb = new StringBuilder(); string[] split = alignment[i].Split('|'); string prefix = split[0].Trim(); sb.Append(prefix); string[] cellStrs = split[1].Trim().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var cells = new AlignmentCell <char> [cellStrs.Length]; for (int j = 0; j < cellStrs.Length; j++) { if (cellStrs[j] == "-") { cells[j] = new AlignmentCell <char>(); } else { sb.Append(cellStrs[j]); cells[j] = new AlignmentCell <char>(cellStrs[j]); } } string suffix = split[2].Trim(); sb.Append(suffix); sequences[i] = Tuple.Create(sb.ToString(), new AlignmentCell <char>(prefix), (IEnumerable <AlignmentCell <char> >)cells, new AlignmentCell <char>(suffix)); } return(new Alignment <string, char>(0, 0, sequences)); }
public AlignedNodeViewModel(int column, AlignmentCell<ShapeNode> cell1, AlignmentCell<ShapeNode> cell2, string note) { _column = column; _cell1 = cell1; _cell2 = cell2; _note = note; }
public AlignedNodeViewModel(int column, AlignmentCell <ShapeNode> cell1, AlignmentCell <ShapeNode> cell2, string note) { _column = column; _cell1 = cell1; _cell2 = cell2; _note = note; }
public static ShapeNode GetRightNode(this Alignment <Word, ShapeNode> alignment, int seq, int col) { AlignmentCell <ShapeNode> cell = alignment[seq, col]; ShapeNode rightNode; if (cell.IsNull) { int index = col + 1; while (index < alignment.ColumnCount && alignment[seq, index].Count == 0) { index++; } if (index < alignment.ColumnCount) { rightNode = alignment[seq, index].First; if (!NodeFilter(rightNode)) { rightNode = rightNode.GetNext(NodeFilter); } } else { rightNode = alignment.Sequences[seq].Shape.End; } } else { rightNode = cell.Last.GetNext(NodeFilter); } return(rightNode); }
public static ShapeNode GetLeftNode(this Alignment <Word, ShapeNode> alignment, int seq, int col) { AlignmentCell <ShapeNode> cell = alignment[seq, col]; ShapeNode leftNode; if (cell.IsNull) { int index = col - 1; while (index >= 0 && alignment[seq, index].Count == 0) { index--; } if (index >= 0) { leftNode = alignment[seq, index].Last; if (!NodeFilter(leftNode)) { leftNode = leftNode.GetPrev(NodeFilter); } } else { leftNode = alignment.Sequences[seq].Shape.Begin; } } else { leftNode = cell.First.GetPrev(NodeFilter); } return(leftNode); }
public void UpdateCognicity(WordPair wordPair, IWordAlignerResult alignerResult) { wordPair.AlignmentNotes.Clear(); Alignment <Word, ShapeNode> alignment = alignerResult.GetAlignments().First(); int initialEquivalentClasses = 0; bool mismatchFound = false; for (int column = 0; column < alignment.ColumnCount; column++) { AlignmentCell <ShapeNode> cell1 = alignment[0, column]; AlignmentCell <ShapeNode> cell2 = alignment[1, column]; if ((cell1.Count > 0 && cell1[0].Type() == CogFeatureSystem.VowelType) || (cell2.Count > 0 && cell2[0].Type() == CogFeatureSystem.VowelType)) { wordPair.AlignmentNotes.Add("X"); } else { if (cell1.StrRep() == cell2.StrRep()) { wordPair.AlignmentNotes.Add("1"); if (!mismatchFound) { initialEquivalentClasses++; } } else { SoundClass sc1; if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 0, column, out sc1)) { sc1 = null; } SoundClass sc2; if (!_soundClasses.TryGetMatchingSoundClass(_segmentPool, alignment, 1, column, out sc2)) { sc2 = null; } if (sc1 != null && sc2 != null && sc1 == sc2) { wordPair.AlignmentNotes.Add("1"); if (!mismatchFound) { initialEquivalentClasses++; } } else { wordPair.AlignmentNotes.Add("0"); mismatchFound = true; } } } } wordPair.AreCognatePredicted = !mismatchFound || initialEquivalentClasses >= _initialEquivalenceThreshold; wordPair.CognicityScore = (double)initialEquivalentClasses / alignment.ColumnCount; }
public MultipleWordAlignmentWordViewModel(Word word, AlignmentCell <ShapeNode> prefix, IEnumerable <AlignmentCell <ShapeNode> > columns, AlignmentCell <ShapeNode> suffix, int cognateSetIndex) { _word = word; _variety = new VarietyViewModel(word.Variety); _prefix = prefix.StrRep(); _columns = new ReadOnlyList <string>(columns.Select(cell => cell.IsNull ? "-" : cell.StrRep()).ToArray()); _suffix = suffix.StrRep(); _cognateSetIndex = cognateSetIndex; }
public MultipleWordAlignmentWordViewModel(MultipleWordAlignmentViewModel parent, Word word, AlignmentCell<ShapeNode> prefix, IEnumerable<AlignmentCell<ShapeNode>> columns, AlignmentCell<ShapeNode> suffix, int cognateSetIndex) { _word = word; ReadOnlyCollection<Word> words = word.Variety.Words[word.Meaning]; _variety = new MultipleWordAlignmentVarietyViewModel(word.Variety, words.Count == 1 ? 0 : IndexOf(words, word)); _prefix = prefix.StrRep(); _columns = new ReadOnlyList<string>(columns.Select(cell => cell.IsNull ? "-" : cell.StrRep()).ToArray()); _suffix = suffix.StrRep(); _cognateSetIndex = cognateSetIndex; _parent = parent; }
public void Process(VarietyPair data) { IWordAligner aligner = _project.WordAligners[_alignerId]; var correspondenceColls = new Dictionary <FeatureSymbol, SoundCorrespondenceCollection> { { CogFeatureSystem.Onset, new SoundCorrespondenceCollection() }, { CogFeatureSystem.Nucleus, new SoundCorrespondenceCollection() }, { CogFeatureSystem.Coda, new SoundCorrespondenceCollection() } }; foreach (WordPair wordPair in data.WordPairs.Where(wp => wp.Cognacy)) { Alignment <Word, ShapeNode> alignment = aligner.Compute(wordPair).GetAlignments().First(); for (int i = 0; i < alignment.ColumnCount; i++) { AlignmentCell <ShapeNode> cell1 = alignment[0, i]; AlignmentCell <ShapeNode> cell2 = alignment[1, i]; if (!cell1.IsNull && !cell2.IsNull && cell1.Count == 1 && cell2.Count == 1) { SymbolicFeatureValue pos1, pos2; if (cell1.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos1) && cell2.First.Annotation.FeatureStruct.TryGetValue(CogFeatureSystem.SyllablePosition, out pos2) && (FeatureSymbol)pos1 == (FeatureSymbol)pos2) { Ngram <Segment> ngram1 = cell1.ToNgram(_segmentPool); Ngram <Segment> ngram2 = cell2.ToNgram(_segmentPool); Segment seg1 = ngram1.First; Segment seg2 = ngram2.First; if (!seg1.Equals(seg2)) { SoundCorrespondenceCollection correspondences = correspondenceColls[(FeatureSymbol)pos1]; SoundCorrespondence corr; if (!correspondences.TryGet(seg1, seg2, out corr)) { corr = new SoundCorrespondence(seg1, seg2); correspondences.Add(corr); } corr.Frequency++; corr.WordPairs.Add(wordPair); } } } } } foreach (KeyValuePair <FeatureSymbol, SoundCorrespondenceCollection> kvp in correspondenceColls) { data.CognateSoundCorrespondencesByPosition[kvp.Key].ReplaceAll(kvp.Value); } }
public MultipleWordAlignmentWordViewModel(MultipleWordAlignmentViewModel parent, Word word, AlignmentCell <ShapeNode> prefix, IEnumerable <AlignmentCell <ShapeNode> > columns, AlignmentCell <ShapeNode> suffix, int cognateSetIndex) { _word = word; ReadOnlyCollection <Word> words = word.Variety.Words[word.Meaning]; _variety = new MultipleWordAlignmentVarietyViewModel(word.Variety, words.Count == 1 ? 0 : IndexOf(words, word)); _prefix = prefix.StrRep(); _columns = new ReadOnlyList <string>(columns.Select(cell => cell.IsNull ? "-" : cell.StrRep()).ToArray()); _suffix = suffix.StrRep(); _cognateSetIndex = cognateSetIndex; _parent = parent; }
private void AlignWords() { if (_selectedMeaning == null) { return; } _busyService.ShowBusyIndicatorUntilFinishDrawing(); var words = new HashSet <Word>(); foreach (VarietyPair vp in _projectService.Project.VarietyPairs) { WordPair wp; if (vp.WordPairs.TryGetValue(_selectedMeaning.DomainMeaning, out wp)) { words.Add(wp.Word1); words.Add(wp.Word2); } } if (words.Count == 0) { _words.Clear(); return; } IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; Alignment <Word, ShapeNode> alignment; if (words.Count == 1) { Word word = words.First(); Annotation <ShapeNode> prefixAnn = word.Prefix; var prefix = new AlignmentCell <ShapeNode>(prefixAnn != null ? word.Shape.GetNodes(prefixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>()); IEnumerable <AlignmentCell <ShapeNode> > columns = word.Shape.GetNodes(word.Stem.Span).Where(NodeFilter).Select(n => new AlignmentCell <ShapeNode>(n)); Annotation <ShapeNode> suffixAnn = word.Suffix; var suffix = new AlignmentCell <ShapeNode>(suffixAnn != null ? word.Shape.GetNodes(suffixAnn.Span).Where(NodeFilter) : Enumerable.Empty <ShapeNode>()); alignment = new Alignment <Word, ShapeNode>(0, 0, Tuple.Create(word, prefix, columns, suffix)); } else { IWordAlignerResult result = aligner.Compute(words); alignment = result.GetAlignments().First(); } List <Cluster <Word> > cognateSets = _projectService.Project.GenerateCognateSets(_selectedMeaning.DomainMeaning).OrderBy(c => c.Noise).ThenByDescending(c => c.DataObjects.Count).ToList(); ColumnCount = alignment.ColumnCount; using (_words.BulkUpdate()) { _words.Clear(); for (int i = 0; i < alignment.SequenceCount; i++) { AlignmentCell <ShapeNode> prefix = alignment.Prefixes[i]; Word word = alignment.Sequences[i]; IEnumerable <AlignmentCell <ShapeNode> > columns = Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[i, col]); AlignmentCell <ShapeNode> suffix = alignment.Suffixes[i]; int cognateSetIndex = cognateSets.FindIndex(set => set.DataObjects.Contains(word)); _words.Add(new MultipleWordAlignmentWordViewModel(word, prefix, columns, suffix, cognateSetIndex == cognateSets.Count - 1 ? int.MaxValue : cognateSetIndex + 1)); } } }
private void AlignWords() { if (_selectedMeaning == null || !_projectService.AreAllVarietiesCompared) return; _busyService.ShowBusyIndicatorUntilFinishDrawing(); var words = new HashSet<Word>(); foreach (VarietyPair vp in _projectService.Project.VarietyPairs) { WordPair wp; if (vp.WordPairs.TryGetValue(_selectedMeaning.DomainMeaning, out wp)) { words.Add(wp.Word1); words.Add(wp.Word2); } } if (words.Count == 0) { _words.Clear(); return; } IWordAligner aligner = _projectService.Project.WordAligners[ComponentIdentifiers.PrimaryWordAligner]; Alignment<Word, ShapeNode> alignment; if (words.Count == 1) { Word word = words.First(); Annotation<ShapeNode> prefixAnn = word.Prefix; var prefix = new AlignmentCell<ShapeNode>(prefixAnn != null ? word.Shape.GetNodes(prefixAnn.Span).Where(NodeFilter) : Enumerable.Empty<ShapeNode>()); IEnumerable<AlignmentCell<ShapeNode>> columns = word.Shape.GetNodes(word.Stem.Span).Where(NodeFilter).Select(n => new AlignmentCell<ShapeNode>(n)); Annotation<ShapeNode> suffixAnn = word.Suffix; var suffix = new AlignmentCell<ShapeNode>(suffixAnn != null ? word.Shape.GetNodes(suffixAnn.Span).Where(NodeFilter) : Enumerable.Empty<ShapeNode>()); alignment = new Alignment<Word, ShapeNode>(0, 0, Tuple.Create(word, prefix, columns, suffix)); } else { IWordAlignerResult result = aligner.Compute(words); alignment = result.GetAlignments().First(); } List<Cluster<Word>> cognateSets = _projectService.Project.GenerateCognateSets(_selectedMeaning.DomainMeaning).OrderBy(c => c.Noise).ThenByDescending(c => c.DataObjects.Count).ToList(); ColumnCount = alignment.ColumnCount; using (_words.BulkUpdate()) { _words.Clear(); for (int i = 0; i < alignment.SequenceCount; i++) { AlignmentCell<ShapeNode> prefix = alignment.Prefixes[i]; Word word = alignment.Sequences[i]; IEnumerable<AlignmentCell<ShapeNode>> columns = Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[i, col]); AlignmentCell<ShapeNode> suffix = alignment.Suffixes[i]; int cognateSetIndex = cognateSets.FindIndex(set => set.DataObjects.Contains(word)); _words.Add(new MultipleWordAlignmentWordViewModel(this, word, prefix, columns, suffix, cognateSetIndex == cognateSets.Count - 1 ? int.MaxValue : cognateSetIndex + 1)); } } IsEmpty = false; }
internal HeadCell(string text, AlignmentCell align) { Text = text; Align = align; }
public AlignedNodeViewModel(AlignmentCell<ShapeNode> cell1, AlignmentCell<ShapeNode> cell2) : this(-1, cell1, cell2, null) { }
public AlignedNodeViewModel(AlignmentCell <ShapeNode> cell1, AlignmentCell <ShapeNode> cell2) : this(-1, cell1, cell2, null) { }