Exemplo n.º 1
0
        public void Compute()
        {
            var clusterer = new NeighborJoiningClusterer <TSeq>((seq1, seq2) =>
            {
                var pairwiseAligner = new PairwiseAlignmentAlgorithm <TSeq, TItem>(_scorer, seq1, seq2, _itemsSelector);
                pairwiseAligner.Compute();
                return(1.0 - pairwiseAligner.GetAlignments().First().NormalizedScore);
            });
            IUndirectedGraph <Cluster <TSeq>, ClusterEdge <TSeq> >    unrootedTree = clusterer.GenerateClusters(_sequences);
            IBidirectionalGraph <Cluster <TSeq>, ClusterEdge <TSeq> > rootedTree   = unrootedTree.ToRootedTree();

            var            profiles  = new Dictionary <Cluster <TSeq>, Profile <TSeq, TItem> >();
            var            nodeStack = new Stack <Cluster <TSeq> >();
            Cluster <TSeq> root      = rootedTree.Roots().First();
            double         maxWeight = double.MinValue;

            if (root.DataObjects.Count == 1)
            {
                profiles[root] = CreateProfile(root.DataObjects.First(), 0);
                maxWeight      = 0;
            }
            nodeStack.Push(root);
            foreach (ClusterEdge <TSeq> edge in rootedTree.OutEdges(root))
            {
                maxWeight = Math.Max(maxWeight, CalcSequenceWeights(rootedTree, edge, 0, nodeStack, profiles));
            }

            foreach (Profile <TSeq, TItem> profile in profiles.Values)
            {
                profile.Weights[0] += 1.0 - maxWeight;
            }

            var scorer = new ProfileScorer <TSeq, TItem>(_scorer);

            while (nodeStack.Count > 0)
            {
                Cluster <TSeq> node = nodeStack.Pop();

                var curProfiles = new Stack <Profile <TSeq, TItem> >();
                foreach (ClusterEdge <TSeq> childEdge in rootedTree.OutEdges(node))
                {
                    curProfiles.Push(profiles[childEdge.Target]);
                    profiles.Remove(childEdge.Target);
                }
                if (node.DataObjects.Count == 1)
                {
                    curProfiles.Push(profiles[node]);
                    profiles.Remove(node);
                }
                while (curProfiles.Count > 1)
                {
                    Profile <TSeq, TItem> profile1 = curProfiles.Pop();
                    Profile <TSeq, TItem> profile2 = curProfiles.Pop();
                    var profileAligner             = new PairwiseAlignmentAlgorithm <Profile <TSeq, TItem>, AlignmentCell <TItem>[]>(scorer, profile1, profile2, GetProfileItems);
                    profileAligner.Compute();
                    Alignment <Profile <TSeq, TItem>, AlignmentCell <TItem>[]> profileAlignment = profileAligner.GetAlignments().First();
                    var sequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >();
                    for (int i = 0; i < profile1.Alignment.SequenceCount; i++)
                    {
                        int seq = i;
                        sequences.Add(Tuple.Create(profile1.Alignment.Sequences[i], profile1.Alignment.Prefixes[i], Enumerable.Range(0, profileAlignment.ColumnCount)
                                                   .Select(col => profileAlignment[0, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[0, col][0][seq]), profile1.Alignment.Suffixes[i]));
                    }
                    for (int j = 0; j < profile2.Alignment.SequenceCount; j++)
                    {
                        int seq = j;
                        sequences.Add(Tuple.Create(profile2.Alignment.Sequences[j], profile2.Alignment.Prefixes[j], Enumerable.Range(0, profileAlignment.ColumnCount)
                                                   .Select(col => profileAlignment[1, col].IsNull ? new AlignmentCell <TItem>() : profileAlignment[1, col][0][seq]), profile2.Alignment.Suffixes[j]));
                    }
                    var newAlignment = new Alignment <TSeq, TItem>(profileAlignment.RawScore, profileAlignment.NormalizedScore, sequences);
                    curProfiles.Push(new Profile <TSeq, TItem>(newAlignment, profile1.Weights.Concat(profile2.Weights)));
                }
                profiles[node] = curProfiles.Pop();
            }

            Alignment <TSeq, TItem> alignment = profiles[root].Alignment;

            if (UseInputOrder)
            {
                var reorderedSequences = new List <Tuple <TSeq, AlignmentCell <TItem>, IEnumerable <AlignmentCell <TItem> >, AlignmentCell <TItem> > >();
                foreach (TSeq sequence in _sequences)
                {
                    for (int i = 0; i < alignment.SequenceCount; i++)
                    {
                        int seq = i;
                        if (sequence.Equals(alignment.Sequences[seq]))
                        {
                            reorderedSequences.Add(Tuple.Create(sequence, alignment.Prefixes[seq], Enumerable.Range(0, alignment.ColumnCount).Select(col => alignment[seq, col]), alignment.Suffixes[seq]));
                            break;
                        }
                    }
                }

                _result = new Alignment <TSeq, TItem>(alignment.RawScore, alignment.NormalizedScore, reorderedSequences);
            }
            else
            {
                _result = alignment;
            }
        }
Exemplo n.º 2
0
        public IBidirectionalGraph <HierarchicalGraphVertex, HierarchicalGraphEdge> GenerateHierarchicalGraph(HierarchicalGraphType graphType,
                                                                                                              ClusteringMethod clusteringMethod, SimilarityMetric similarityMetric)
        {
            switch (clusteringMethod)
            {
            case ClusteringMethod.Upgma:
                Func <Variety, Variety, double> upgmaGetDistance = null;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    upgmaGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                    break;
                }

                var upgma = new UpgmaClusterer <Variety>(upgmaGetDistance);
                IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > upgmaTree = upgma.GenerateClusters(_projectService.Project.Varieties);
                return(BuildHierarchicalGraph(upgmaTree));

            case ClusteringMethod.NeighborJoining:
                Func <Variety, Variety, double> njGetDistance = null;
                switch (similarityMetric)
                {
                case SimilarityMetric.Lexical:
                    njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].LexicalSimilarityScore;
                    break;

                case SimilarityMetric.Phonetic:
                    njGetDistance = (v1, v2) => 1.0 - v1.VarietyPairs[v2].PhoneticSimilarityScore;
                    break;
                }
                var nj = new NeighborJoiningClusterer <Variety>(njGetDistance);
                IUndirectedGraph <Cluster <Variety>, ClusterEdge <Variety> > njTree = nj.GenerateClusters(_projectService.Project.Varieties);
                switch (graphType)
                {
                case HierarchicalGraphType.Dendrogram:
                    IBidirectionalGraph <Cluster <Variety>, ClusterEdge <Variety> > rootedTree = njTree.ToRootedTree();
                    return(BuildHierarchicalGraph(rootedTree));

                case HierarchicalGraphType.Tree:
                    return(BuildHierarchicalGraph(njTree));
                }
                break;
            }

            return(null);
        }