Пример #1
0
        public void ClusterNoDataObjects()
        {
            var nj = new NeighborJoiningClusterer <char>((o1, o2) => 0);
            IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(Enumerable.Empty <char>());

            Assert.That(tree.IsEdgesEmpty);
        }
 public void ClusterOneDataObject()
 {
     var nj = new NeighborJoiningClusterer<char>((o1, o2) => 0);
     IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(new[] {'A'});
     Assert.That(tree.VertexCount, Is.EqualTo(1));
     Assert.That(tree.IsEdgesEmpty);
 }
Пример #3
0
        public void Cluster()
        {
            var matrix = new double[, ]
            {
                { 0, 1, 2, 3, 3 },
                { 1, 0, 2, 3, 3 },
                { 2, 2, 0, 3, 3 },
                { 3, 3, 3, 0, 1 },
                { 3, 3, 3, 1, 0 }
            };
            var nj = new NeighborJoiningClusterer <char>((o1, o2) => matrix[o1 - 'A', o2 - 'A']);
            IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(new[] { 'A', 'B', 'C', 'D', 'E' });

            var vertices = new Dictionary <string, Cluster <char> >
            {
                { "root", new Cluster <char> {
                      Description = "root"
                  } },
                { "A", new Cluster <char>('A')
                  {
                      Description = "A"
                  } },
                { "B", new Cluster <char>('B')
                  {
                      Description = "B"
                  } },
                { "C", new Cluster <char>('C')
                  {
                      Description = "C"
                  } },
                { "D", new Cluster <char>('D')
                  {
                      Description = "D"
                  } },
                { "E", new Cluster <char>('E')
                  {
                      Description = "E"
                  } },
                { "DE", new Cluster <char> {
                      Description = "DE"
                  } },
                { "AB", new Cluster <char> {
                      Description = "AB"
                  } }
            };

            var edges = new[]
            {
                new ClusterEdge <char>(vertices["root"], vertices["C"], 1.0),
                new ClusterEdge <char>(vertices["root"], vertices["DE"], 1.5),
                new ClusterEdge <char>(vertices["root"], vertices["AB"], 0.5),
                new ClusterEdge <char>(vertices["DE"], vertices["D"], 0.5),
                new ClusterEdge <char>(vertices["DE"], vertices["E"], 0.5),
                new ClusterEdge <char>(vertices["AB"], vertices["A"], 0.5),
                new ClusterEdge <char>(vertices["AB"], vertices["B"], 0.5)
            };

            AssertTreeEqual(tree, edges.ToUndirectedGraph <Cluster <char>, ClusterEdge <char> >(false));
        }
Пример #4
0
        public void ClusterOneDataObject()
        {
            var nj = new NeighborJoiningClusterer <char>((o1, o2) => 0);
            IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(new[] { 'A' });

            Assert.That(tree.VertexCount, Is.EqualTo(1));
            Assert.That(tree.IsEdgesEmpty);
        }
Пример #5
0
        public void ClusterTwoDataObjects()
        {
            var nj = new NeighborJoiningClusterer <char>((o1, o2) => 1);
            IUndirectedGraph <Cluster <char>, ClusterEdge <char> > tree = nj.GenerateClusters(new[] { 'A', 'B' });

            var edges = new[]
            {
                new ClusterEdge <char>(new Cluster <char>('A'), new Cluster <char>('B'), 1.0)
            };

            AssertTreeEqual(tree, edges.ToUndirectedGraph <Cluster <char>, ClusterEdge <char> >());
        }
        public void ClusterTwoDataObjects()
        {
            var nj = new NeighborJoiningClusterer<char>((o1, o2) => 1);
            IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(new[] {'A', 'B'});

            var edges = new[]
                {
                    new ClusterEdge<char>(new Cluster<char>('A'), new Cluster<char>('B'), 1.0)
                };

            AssertTreeEqual(tree, edges.ToUndirectedGraph<Cluster<char>, ClusterEdge<char>>());
        }
        public void Cluster()
        {
            var matrix = new double[,]
                {
                    {0, 1, 2, 3, 3},
                    {1, 0, 2, 3, 3},
                    {2, 2, 0, 3, 3},
                    {3, 3, 3, 0, 1},
                    {3, 3, 3, 1, 0}
                };
            var nj = new NeighborJoiningClusterer<char>((o1, o2) => matrix[o1 - 'A', o2 - 'A']);
            IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(new[] {'A', 'B', 'C', 'D', 'E'});

            var vertices = new Dictionary<string, Cluster<char>>
                {
                    {"root", new Cluster<char> {Description = "root"}},
                    {"A", new Cluster<char>('A') {Description = "A"}},
                    {"B", new Cluster<char>('B') {Description = "B"}},
                    {"C", new Cluster<char>('C') {Description = "C"}},
                    {"D", new Cluster<char>('D') {Description = "D"}},
                    {"E", new Cluster<char>('E') {Description = "E"}},
                    {"DE", new Cluster<char> {Description = "DE"}},
                    {"AB", new Cluster<char> {Description = "AB"}}
                };

            var edges = new[]
                {
                    new ClusterEdge<char>(vertices["root"], vertices["C"], 1.0),
                    new ClusterEdge<char>(vertices["root"], vertices["DE"], 1.5),
                    new ClusterEdge<char>(vertices["root"], vertices["AB"], 0.5),
                    new ClusterEdge<char>(vertices["DE"], vertices["D"], 0.5),
                    new ClusterEdge<char>(vertices["DE"], vertices["E"], 0.5),
                    new ClusterEdge<char>(vertices["AB"], vertices["A"], 0.5),
                    new ClusterEdge<char>(vertices["AB"], vertices["B"], 0.5)
                };

            AssertTreeEqual(tree, edges.ToUndirectedGraph<Cluster<char>, ClusterEdge<char>>(false));
        }
Пример #8
0
        public void Compute()
        {
            var clusterer = new NeighborJoiningClusterer <TSeq>((seq1, seq2) =>
            {
                var pairwiseAligner = new PairwiseAlignmentAlgorithm <TSeq, TItem>(_scorer, seq1, seq2, _itemsSelector);
                pairwiseAligner.Compute();
                return(1.0 - pairwiseAligner.GetAlignments().First().NormalizedScore);
            });
            IUndirectedGraph <Cluster <TSeq>, ClusterEdge <TSeq> >    unrootedTree = clusterer.GenerateClusters(_sequences);
            IBidirectionalGraph <Cluster <TSeq>, ClusterEdge <TSeq> > rootedTree   = unrootedTree.ToRootedTree();

            var            profiles  = new Dictionary <Cluster <TSeq>, Profile <TSeq, TItem> >();
            var            nodeStack = new Stack <Cluster <TSeq> >();
            Cluster <TSeq> root      = rootedTree.Roots().First();
            double         maxWeight = double.MinValue;

            if (root.DataObjects.Count == 1)
            {
                profiles[root] = CreateProfile(root.DataObjects.First(), 0);
                maxWeight      = 0;
            }
            nodeStack.Push(root);
            foreach (ClusterEdge <TSeq> edge in rootedTree.OutEdges(root))
            {
                maxWeight = Math.Max(maxWeight, CalcSequenceWeights(rootedTree, edge, 0, nodeStack, profiles));
            }

            foreach (Profile <TSeq, TItem> profile in profiles.Values)
            {
                profile.Weights[0] += 1.0 - maxWeight;
            }

            var scorer = new ProfileScorer <TSeq, TItem>(_scorer);

            while (nodeStack.Count > 0)
            {
                Cluster <TSeq> node = nodeStack.Pop();

                var curProfiles = new Stack <Profile <TSeq, TItem> >();
                foreach (ClusterEdge <TSeq> childEdge in rootedTree.OutEdges(node))
                {
                    curProfiles.Push(profiles[childEdge.Target]);
                    profiles.Remove(childEdge.Target);
                }
                if (node.DataObjects.Count == 1)
                {
                    curProfiles.Push(profiles[node]);
                    profiles.Remove(node);
                }
                while (curProfiles.Count > 1)
                {
                    Profile <TSeq, TItem> profile1 = curProfiles.Pop();
                    Profile <TSeq, TItem> profile2 = curProfiles.Pop();
                    var profileAligner             = new PairwiseAlignmentAlgorithm <Profile <TSeq, TItem>, AlignmentCell <TItem>[]>(scorer, profile1, profile2, GetProfileItems);
                    profileAligner.Compute();
                    Alignment <Profile <TSeq, TItem>, AlignmentCell <TItem>[]> profileAlignment = profileAligner.GetAlignments().First();
                    var sequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >();
                    for (int i = 0; i < profile1.Alignment.SequenceCount; i++)
                    {
                        int seq = i;
                        sequences.Add(Tuple.Create(profile1.Alignment.Sequences[i], profile1.Alignment.Prefixes[i], Enumerable.Range(0, profileAlignment.ColumnCount)
                                                   .Select(col => profileAlignment[0, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[0, col][0][seq]), profile1.Alignment.Suffixes[i]));
                    }
                    for (int j = 0; j < profile2.Alignment.SequenceCount; j++)
                    {
                        int seq = j;
                        sequences.Add(Tuple.Create(profile2.Alignment.Sequences[j], profile2.Alignment.Prefixes[j], Enumerable.Range(0, profileAlignment.ColumnCount)
                                                   .Select(col => profileAlignment[1, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[1, col][0][seq]), profile2.Alignment.Suffixes[j]));
                    }
                    var newAlignment = new Alignment <TSeq, TItem>(profileAlignment.RawScore, profileAlignment.NormalizedScore, sequences);
                    curProfiles.Push(new Profile <TSeq, TItem>(newAlignment, profile1.Weights.Concat(profile2.Weights)));
                }
                profiles[node] = curProfiles.Pop();
            }

            Alignment <TSeq, TItem> alignment = profiles[root].Alignment;

            if (UseInputOrder)
            {
                var reorderedSequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >();
                foreach (TSeq sequence in _sequences)
                {
                    for (int i = 0; i < alignment.SequenceCount; i++)
                    {
                        int seq = i;
                        if (sequence.Equals(alignment.Sequences[seq]))
                        {
                            reorderedSequences.Add(Tuple.Create(sequence, alignment.Prefixes[seq], Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[seq, col]), alignment.Suffixes[seq]));
                            break;
                        }
                    }
                }

                _result = new Alignment <TSeq, TItem>(alignment.RawScore, alignment.NormalizedScore, reorderedSequences);
            }
            else
            {
                _result = alignment;
            }
        }
Пример #9
0
        public IBidirectionalGraph<HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType,
			ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric)
        {
            switch (clusteringMethod)
            {
                case ClusteringMethod.Upgma:
                    Func<Variety, Variety, double> upgmaGetDistance = null;
                    switch (similarityMetric)
                    {
                        case SimilarityMetric.Lexical:
                            upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                            break;
                        case SimilarityMetric.Phonetic:
                            upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                            break;
                    }

                    var upgma = new UpgmaClusterer<Variety>(upgmaGetDistance);
                    IBidirectionalGraph<Cluster<Variety>, ClusterEdge<Variety>> upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties);
                    return BuildHierarchicalGraph(upgmaTree);

                case ClusteringMethod.NeighborJoining:
                    Func<Variety, Variety, double> njGetDistance = null;
                    switch (similarityMetric)
                    {
                        case SimilarityMetric.Lexical:
                            njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                            break;
                        case SimilarityMetric.Phonetic:
                            njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                            break;
                    }
                    var nj = new NeighborJoiningClusterer<Variety>(njGetDistance);
                    IUndirectedGraph<Cluster<Variety>, ClusterEdge<Variety>> njTree = nj.GenerateClusters(_projectService.Project.Varieties);
                    switch (graphType)
                    {
                        case HierarchicalGraphType.Dendrogram:
                            IBidirectionalGraph<Cluster<Variety>, ClusterEdge<Variety>> rootedTree = njTree.ToRootedTree();
                            return BuildHierarchicalGraph(rootedTree);

                        case HierarchicalGraphType.Tree:
                            return BuildHierarchicalGraph(njTree);
                    }
                    break;
            }

            return null;
        }
Пример #10
0
        public IBidirectionalGraph <HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType,
                                                                                                              ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric)
        {
            switch (clusteringMethod)
            {
            case ClusteringMethod.Upgma:
                Func <Variety, Variety, double> upgmaGetDistance = null;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                    break;
                }

                var upgma = new UpgmaClusterer <Variety>(upgmaGetDistance);
                IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties);
                return(BuildHierarchicalGraph(upgmaTree));

            case ClusteringMethod.NeighborJoining:
                Func <Variety, Variety, double> njGetDistance = null;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                    break;
                }
                var nj = new NeighborJoiningClusterer <Variety>(njGetDistance);
                IUndirectedGraph <Cluster <Variety>, ClusterEdge <Variety> > njTree = nj.GenerateClusters(_projectService.Project.Varieties);
                switch (graphType)
                {
                case HierarchicalGraphType.Dendrogram:
                    IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > rootedTree = njTree.ToRootedTree();
                    return(BuildHierarchicalGraph(rootedTree));

                case HierarchicalGraphType.Tree:
                    return(BuildHierarchicalGraph(njTree));
                }
                break;
            }

            return(null);
        }
 public void ClusterNoDataObjects()
 {
     var nj = new NeighborJoiningClusterer<char>((o1, o2) => 0);
     IUndirectedGraph<Cluster<char>, ClusterEdge<char>> tree = nj.GenerateClusters(Enumerable.Empty<char>());
     Assert.That(tree.IsEdgesEmpty);
 }