public void Compute() { var clusterer = new NeighborJoiningClusterer <TSeq>((seq1, seq2) => { var pairwiseAligner = new PairwiseAlignmentAlgorithm <TSeq, TItem>(_scorer, seq1, seq2, _itemsSelector); pairwiseAligner.Compute(); return(1.0 - pairwiseAligner.GetAlignments().First().NormalizedScore); }); IUndirectedGraph <Cluster <TSeq>, ClusterEdge <TSeq> > unrootedTree = clusterer.GenerateClusters(_sequences); IBidirectionalGraph <Cluster <TSeq>, ClusterEdge <TSeq> > rootedTree = unrootedTree.ToRootedTree(); var profiles = new Dictionary <Cluster <TSeq>, Profile <TSeq, TItem> >(); var nodeStack = new Stack <Cluster <TSeq> >(); Cluster <TSeq> root = rootedTree.Roots().First(); double maxWeight = double.MinValue; if (root.DataObjects.Count == 1) { profiles[root] = CreateProfile(root.DataObjects.First(), 0); maxWeight = 0; } nodeStack.Push(root); foreach (ClusterEdge <TSeq> edge in rootedTree.OutEdges(root)) { maxWeight = Math.Max(maxWeight, CalcSequenceWeights(rootedTree, edge, 0, nodeStack, profiles)); } foreach (Profile <TSeq, TItem> profile in profiles.Values) { profile.Weights[0] += 1.0 - maxWeight; } var scorer = new ProfileScorer <TSeq, TItem>(_scorer); while (nodeStack.Count > 0) { Cluster <TSeq> node = nodeStack.Pop(); var curProfiles = new Stack <Profile <TSeq, TItem> >(); foreach (ClusterEdge <TSeq> childEdge in rootedTree.OutEdges(node)) { curProfiles.Push(profiles[childEdge.Target]); profiles.Remove(childEdge.Target); } if (node.DataObjects.Count == 1) { curProfiles.Push(profiles[node]); profiles.Remove(node); } while (curProfiles.Count > 1) { Profile <TSeq, TItem> profile1 = curProfiles.Pop(); Profile <TSeq, TItem> profile2 = curProfiles.Pop(); var profileAligner = new PairwiseAlignmentAlgorithm <Profile <TSeq, TItem>, AlignmentCell <TItem>[]>(scorer, profile1, profile2, GetProfileItems); profileAligner.Compute(); Alignment <Profile <TSeq, TItem>, AlignmentCell <TItem>[]> profileAlignment = profileAligner.GetAlignments().First(); var sequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >(); for (int i = 0; i < profile1.Alignment.SequenceCount; i++) { int seq = i; sequences.Add(Tuple.Create(profile1.Alignment.Sequences[i], profile1.Alignment.Prefixes[i], Enumerable.Range(0, profileAlignment.ColumnCount) .Select(col => profileAlignment[0, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[0, col][0][seq]), profile1.Alignment.Suffixes[i])); } for (int j = 0; j < profile2.Alignment.SequenceCount; j++) { int seq = j; sequences.Add(Tuple.Create(profile2.Alignment.Sequences[j], profile2.Alignment.Prefixes[j], Enumerable.Range(0, profileAlignment.ColumnCount) .Select(col => profileAlignment[1, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[1, col][0][seq]), profile2.Alignment.Suffixes[j])); } var newAlignment = new Alignment <TSeq, TItem>(profileAlignment.RawScore, profileAlignment.NormalizedScore, sequences); curProfiles.Push(new Profile <TSeq, TItem>(newAlignment, profile1.Weights.Concat(profile2.Weights))); } profiles[node] = curProfiles.Pop(); } Alignment <TSeq, TItem> alignment = profiles[root].Alignment; if (UseInputOrder) { var reorderedSequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >(); foreach (TSeq sequence in _sequences) { for (int i = 0; i < alignment.SequenceCount; i++) { int seq = i; if (sequence.Equals(alignment.Sequences[seq])) { reorderedSequences.Add(Tuple.Create(sequence, alignment.Prefixes[seq], Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[seq, col]), alignment.Suffixes[seq])); break; } } } _result = new Alignment <TSeq, TItem>(alignment.RawScore, alignment.NormalizedScore, reorderedSequences); } else { _result = alignment; } }
public IBidirectionalGraph <HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType, ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric) { switch (clusteringMethod) { case ClusteringMethod.Upgma: Func <Variety, Variety, double> upgmaGetDistance = null; switch (similarityMetric) { case SimilarityMetric.Lexical: upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore; break; case SimilarityMetric.Phonetic: upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore; break; } var upgma = new UpgmaClusterer <Variety>(upgmaGetDistance); IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties); return(BuildHierarchicalGraph(upgmaTree)); case ClusteringMethod.NeighborJoining: Func <Variety, Variety, double> njGetDistance = null; switch (similarityMetric) { case SimilarityMetric.Lexical: njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore; break; case SimilarityMetric.Phonetic: njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore; break; } var nj = new NeighborJoiningClusterer <Variety>(njGetDistance); IUndirectedGraph <Cluster <Variety>, ClusterEdge <Variety> > njTree = nj.GenerateClusters(_projectService.Project.Varieties); switch (graphType) { case HierarchicalGraphType.Dendrogram: IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > rootedTree = njTree.ToRootedTree(); return(BuildHierarchicalGraph(rootedTree)); case HierarchicalGraphType.Tree: return(BuildHierarchicalGraph(njTree)); } break; } return(null); }