public void ClusterNoDataObjects() { var nj = new NeighborJoiningClusterer <char>((o1, o2) => 0); IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(Enumerable.Empty <char>()); Assert.That(tree.IsEdgesEmpty); }
public void ClusterOneDataObject() { var nj = new NeighborJoiningClusterer<char>((o1, o2) => 0); IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(new[] {'A'}); Assert.That(tree.VertexCount, Is.EqualTo(1)); Assert.That(tree.IsEdgesEmpty); }
public void Cluster() { var matrix = new double[, ] { { 0, 1, 2, 3, 3 }, { 1, 0, 2, 3, 3 }, { 2, 2, 0, 3, 3 }, { 3, 3, 3, 0, 1 }, { 3, 3, 3, 1, 0 } }; var nj = new NeighborJoiningClusterer <char>((o1, o2) => matrix[o1 - 'A', o2 - 'A']); IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(new[] { 'A', 'B', 'C', 'D', 'E' }); var vertices = new Dictionary <string, Cluster <char> > { { "root", new Cluster <char> { Description = "root" } }, { "A", new Cluster <char>('A') { Description = "A" } }, { "B", new Cluster <char>('B') { Description = "B" } }, { "C", new Cluster <char>('C') { Description = "C" } }, { "D", new Cluster <char>('D') { Description = "D" } }, { "E", new Cluster <char>('E') { Description = "E" } }, { "DE", new Cluster <char> { Description = "DE" } }, { "AB", new Cluster <char> { Description = "AB" } } }; var edges = new[] { new ClusterEdge <char>(vertices["root"], vertices["C"], 1.0), new ClusterEdge <char>(vertices["root"], vertices["DE"], 1.5), new ClusterEdge <char>(vertices["root"], vertices["AB"], 0.5), new ClusterEdge <char>(vertices["DE"], vertices["D"], 0.5), new ClusterEdge <char>(vertices["DE"], vertices["E"], 0.5), new ClusterEdge <char>(vertices["AB"], vertices["A"], 0.5), new ClusterEdge <char>(vertices["AB"], vertices["B"], 0.5) }; AssertTreeEqual(tree, edges.ToUndirectedGraph <Cluster <char>, ClusterEdge <char> >(false)); }
public void ClusterOneDataObject() { var nj = new NeighborJoiningClusterer <char>((o1, o2) => 0); IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(new[] { 'A' }); Assert.That(tree.VertexCount, Is.EqualTo(1)); Assert.That(tree.IsEdgesEmpty); }
public void ClusterTwoDataObjects() { var nj = new NeighborJoiningClusterer <char>((o1, o2) => 1); IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(new[] { 'A', 'B' }); var edges = new[] { new ClusterEdge <char>(new Cluster <char>('A'), new Cluster <char>('B'), 1.0) }; AssertTreeEqual(tree, edges.ToUndirectedGraph <Cluster <char>, ClusterEdge <char> >()); }
public void ClusterTwoDataObjects() { var nj = new NeighborJoiningClusterer<char>((o1, o2) => 1); IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(new[] {'A', 'B'}); var edges = new[] { new ClusterEdge<char>(new Cluster<char>('A'), new Cluster<char>('B'), 1.0) }; AssertTreeEqual(tree, edges.ToUndirectedGraph<Cluster<char>, ClusterEdge<char>>()); }
public void Cluster() { var matrix = new double[,] { {0, 1, 2, 3, 3}, {1, 0, 2, 3, 3}, {2, 2, 0, 3, 3}, {3, 3, 3, 0, 1}, {3, 3, 3, 1, 0} }; var nj = new NeighborJoiningClusterer<char>((o1, o2) => matrix[o1 - 'A', o2 - 'A']); IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(new[] {'A', 'B', 'C', 'D', 'E'}); var vertices = new Dictionary<string, Cluster<char>> { {"root", new Cluster<char> {Description = "root"}}, {"A", new Cluster<char>('A') {Description = "A"}}, {"B", new Cluster<char>('B') {Description = "B"}}, {"C", new Cluster<char>('C') {Description = "C"}}, {"D", new Cluster<char>('D') {Description = "D"}}, {"E", new Cluster<char>('E') {Description = "E"}}, {"DE", new Cluster<char> {Description = "DE"}}, {"AB", new Cluster<char> {Description = "AB"}} }; var edges = new[] { new ClusterEdge<char>(vertices["root"], vertices["C"], 1.0), new ClusterEdge<char>(vertices["root"], vertices["DE"], 1.5), new ClusterEdge<char>(vertices["root"], vertices["AB"], 0.5), new ClusterEdge<char>(vertices["DE"], vertices["D"], 0.5), new ClusterEdge<char>(vertices["DE"], vertices["E"], 0.5), new ClusterEdge<char>(vertices["AB"], vertices["A"], 0.5), new ClusterEdge<char>(vertices["AB"], vertices["B"], 0.5) }; AssertTreeEqual(tree, edges.ToUndirectedGraph<Cluster<char>, ClusterEdge<char>>(false)); }
public void Compute() { var clusterer = new NeighborJoiningClusterer <TSeq>((seq1, seq2) => { var pairwiseAligner = new PairwiseAlignmentAlgorithm <TSeq, TItem>(_scorer, seq1, seq2, _itemsSelector); pairwiseAligner.Compute(); return(1.0 - pairwiseAligner.GetAlignments().First().NormalizedScore); }); IUndirectedGraph <Cluster <TSeq>, ClusterEdge <TSeq> > unrootedTree = clusterer.GenerateClusters(_sequences); IBidirectionalGraph <Cluster <TSeq>, ClusterEdge <TSeq> > rootedTree = unrootedTree.ToRootedTree(); var profiles = new Dictionary <Cluster <TSeq>, Profile <TSeq, TItem> >(); var nodeStack = new Stack <Cluster <TSeq> >(); Cluster <TSeq> root = rootedTree.Roots().First(); double maxWeight = double.MinValue; if (root.DataObjects.Count == 1) { profiles[root] = CreateProfile(root.DataObjects.First(), 0); maxWeight = 0; } nodeStack.Push(root); foreach (ClusterEdge <TSeq> edge in rootedTree.OutEdges(root)) { maxWeight = Math.Max(maxWeight, CalcSequenceWeights(rootedTree, edge, 0, nodeStack, profiles)); } foreach (Profile <TSeq, TItem> profile in profiles.Values) { profile.Weights[0] += 1.0 - maxWeight; } var scorer = new ProfileScorer <TSeq, TItem>(_scorer); while (nodeStack.Count > 0) { Cluster <TSeq> node = nodeStack.Pop(); var curProfiles = new Stack <Profile <TSeq, TItem> >(); foreach (ClusterEdge <TSeq> childEdge in rootedTree.OutEdges(node)) { curProfiles.Push(profiles[childEdge.Target]); profiles.Remove(childEdge.Target); } if (node.DataObjects.Count == 1) { curProfiles.Push(profiles[node]); profiles.Remove(node); } while (curProfiles.Count > 1) { Profile <TSeq, TItem> profile1 = curProfiles.Pop(); Profile <TSeq, TItem> profile2 = curProfiles.Pop(); var profileAligner = new PairwiseAlignmentAlgorithm <Profile <TSeq, TItem>, AlignmentCell <TItem>[]>(scorer, profile1, profile2, GetProfileItems); profileAligner.Compute(); Alignment <Profile <TSeq, TItem>, AlignmentCell <TItem>[]> profileAlignment = profileAligner.GetAlignments().First(); var sequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >(); for (int i = 0; i < profile1.Alignment.SequenceCount; i++) { int seq = i; sequences.Add(Tuple.Create(profile1.Alignment.Sequences[i], profile1.Alignment.Prefixes[i], Enumerable.Range(0, profileAlignment.ColumnCount) .Select(col => profileAlignment[0, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[0, col][0][seq]), profile1.Alignment.Suffixes[i])); } for (int j = 0; j < profile2.Alignment.SequenceCount; j++) { int seq = j; sequences.Add(Tuple.Create(profile2.Alignment.Sequences[j], profile2.Alignment.Prefixes[j], Enumerable.Range(0, profileAlignment.ColumnCount) .Select(col => profileAlignment[1, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[1, col][0][seq]), profile2.Alignment.Suffixes[j])); } var newAlignment = new Alignment <TSeq, TItem>(profileAlignment.RawScore, profileAlignment.NormalizedScore, sequences); curProfiles.Push(new Profile <TSeq, TItem>(newAlignment, profile1.Weights.Concat(profile2.Weights))); } profiles[node] = curProfiles.Pop(); } Alignment <TSeq, TItem> alignment = profiles[root].Alignment; if (UseInputOrder) { var reorderedSequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >(); foreach (TSeq sequence in _sequences) { for (int i = 0; i < alignment.SequenceCount; i++) { int seq = i; if (sequence.Equals(alignment.Sequences[seq])) { reorderedSequences.Add(Tuple.Create(sequence, alignment.Prefixes[seq], Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[seq, col]), alignment.Suffixes[seq])); break; } } } _result = new Alignment <TSeq, TItem>(alignment.RawScore, alignment.NormalizedScore, reorderedSequences); } else { _result = alignment; } }
public IBidirectionalGraph<HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType, ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric) { switch (clusteringMethod) { case ClusteringMethod.Upgma: Func<Variety, Variety, double> upgmaGetDistance = null; switch (similarityMetric) { case SimilarityMetric.Lexical: upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore; break; case SimilarityMetric.Phonetic: upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore; break; } var upgma = new UpgmaClusterer<Variety>(upgmaGetDistance); IBidirectionalGraph<Cluster<Variety>, ClusterEdge<Variety>> upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties); return BuildHierarchicalGraph(upgmaTree); case ClusteringMethod.NeighborJoining: Func<Variety, Variety, double> njGetDistance = null; switch (similarityMetric) { case SimilarityMetric.Lexical: njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore; break; case SimilarityMetric.Phonetic: njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore; break; } var nj = new NeighborJoiningClusterer<Variety>(njGetDistance); IUndirectedGraph<Cluster<Variety>, ClusterEdge<Variety>> njTree = nj.GenerateClusters(_projectService.Project.Varieties); switch (graphType) { case HierarchicalGraphType.Dendrogram: IBidirectionalGraph<Cluster<Variety>, ClusterEdge<Variety>> rootedTree = njTree.ToRootedTree(); return BuildHierarchicalGraph(rootedTree); case HierarchicalGraphType.Tree: return BuildHierarchicalGraph(njTree); } break; } return null; }
public IBidirectionalGraph <HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType, ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric) { switch (clusteringMethod) { case ClusteringMethod.Upgma: Func <Variety, Variety, double> upgmaGetDistance = null; switch (similarityMetric) { case SimilarityMetric.Lexical: upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore; break; case SimilarityMetric.Phonetic: upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore; break; } var upgma = new UpgmaClusterer <Variety>(upgmaGetDistance); IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties); return(BuildHierarchicalGraph(upgmaTree)); case ClusteringMethod.NeighborJoining: Func <Variety, Variety, double> njGetDistance = null; switch (similarityMetric) { case SimilarityMetric.Lexical: njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore; break; case SimilarityMetric.Phonetic: njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore; break; } var nj = new NeighborJoiningClusterer <Variety>(njGetDistance); IUndirectedGraph <Cluster <Variety>, ClusterEdge <Variety> > njTree = nj.GenerateClusters(_projectService.Project.Varieties); switch (graphType) { case HierarchicalGraphType.Dendrogram: IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > rootedTree = njTree.ToRootedTree(); return(BuildHierarchicalGraph(rootedTree)); case HierarchicalGraphType.Tree: return(BuildHierarchicalGraph(njTree)); } break; } return(null); }
public void ClusterNoDataObjects() { var nj = new NeighborJoiningClusterer<char>((o1, o2) => 0); IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(Enumerable.Empty<char>()); Assert.That(tree.IsEdgesEmpty); }