Beispiel #1
0
        /// <summary>
        /// Depth first search (DFS) algorithm (recursive version).
        /// Visit the subtree nodes from the current node, and
        /// mark visited nodes by DFS with 'true'.
        /// </summary>
        /// <param name="node">root node of the subtree</param>
        /// <param name="mark">Bool list of all nodes (in a tree)</param>
        public static void DepthFirstSearch(BinaryGuideTreeNode node, List <bool> mark)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            mark[node.ID] = true;
            while (node.LeftChildren != null)
            {
                if (mark[node.LeftChildren.ID] == false)
                {
                    DepthFirstSearch(node.LeftChildren, mark);
                }
                else
                {
                    break;
                }
            }

            while (node.RightChildren != null)
            {
                if (mark[node.RightChildren.ID] == false)
                {
                    DepthFirstSearch(node.RightChildren, mark);
                }
                else
                {
                    break;
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// Compare two (sub)trees from root to leaves,
        /// find the top node position that the subtrees of this node are
        /// different between the two trees
        ///
        /// Normally nodeA is the root of newly generated tree A, and nodeB is the root
        /// of old tree B. This method returns the top node in tree A, so that the subtree
        /// of this node will be re-aligned.
        /// </summary>
        /// <param name="nodeA">root of (sub)tree</param>
        /// <param name="nodeB">root of (sub)tree</param>
        public static BinaryGuideTreeNode FindSmallestTreeDifference(BinaryGuideTreeNode nodeA, BinaryGuideTreeNode nodeB)
        {
            if (nodeA == null)
            {
                throw new ArgumentNullException("nodeA");
            }

            if (nodeB == null)
            {
                throw new ArgumentNullException("nodeB");
            }

            //TODO the order of left/right child nodes do not have to be the same to be identical
            if (nodeA != nodeB)
            {
                return(nodeA);
            }
            else
            {
                while (nodeA.LeftChildren != null && nodeB.LeftChildren != null)
                {
                    return(FindSmallestTreeDifference(nodeA.LeftChildren, nodeB.LeftChildren));
                }
                while (nodeA.RightChildren != null && nodeB.RightChildren != null)
                {
                    return(FindSmallestTreeDifference(nodeA.RightChildren, nodeB.RightChildren));
                }
            }
            return(null);
        }
Beispiel #3
0
 /// <summary>
 /// Create a tree with an assigned tree ID
 /// </summary>
 /// <param name="id">zero-based tree ID</param>
 public BinaryGuideTree(int id)
 {
     _id             = id;
     _root           = new BinaryGuideTreeNode();
     _numberOfNodes  = 1;
     _numberOfLeaves = 1;
 }
Beispiel #4
0
        /// <summary>
        /// Combine cluster nextA and nextB into a new cluster
        /// </summary>
        private void CreateCluster()
        {
            BinaryGuideTreeNode _node = new BinaryGuideTreeNode(++_currentClusterID);

            // link the two nodes nextA and nextB with the new node
            _node.LeftChildren   = Nodes[_nextA];
            _node.RightChildren  = Nodes[_nextB];
            Nodes[_nextA].Parent = _node;
            Nodes[_nextB].Parent = _node;

            // use the leftmost leave's sequenceID
            _node.SequenceID = Nodes[_nextA].SequenceID;

            Nodes.Add(_node);

            BinaryGuideTreeEdge _edge1 = new BinaryGuideTreeEdge(Nodes[_nextA].ID);
            BinaryGuideTreeEdge _edge2 = new BinaryGuideTreeEdge(Nodes[_nextB].ID);

            _edge1.ParentNode = _node;
            _edge2.ParentNode = _node;
            _edge1.ChildNode  = Nodes[_nextA];
            _edge2.ChildNode  = Nodes[_nextB];

            // the length of the edge is the percent identity of two node sequences
            // or the average of identities between two sets of sequences
            //_edge1.Length = KimuraDistanceScoreCalculator.calculateDistanceScore(
            //    seqs[nodes[next1].sequenceID], seqs[nodes[next2].sequenceID]);

            // modified: define kimura distance as sequence distance
            _edge1.Length = _smallestDistance;
            _edge2.Length = _smallestDistance;

            _edges.Add(_edge1);
            _edges.Add(_edge2);
        }
Beispiel #5
0
        /// <summary>
        /// Create a tree using an existing node as root
        /// </summary>
        /// <param name="r">a BinaryGuideTreeNode</param>
        public BinaryGuideTree(BinaryGuideTreeNode r)
        {
            if (r == null)
            {
                throw new ArgumentNullException("r");
            }

            _id             = r.ID;
            _root           = r;
            _numberOfNodes  = 1;
            _numberOfLeaves = 1;
        }
Beispiel #6
0
 /// <summary>
 /// Construct a tree by hierarchical clustering method.
 ///
 /// The node list is already generated in the hierarchical clustering method
 /// and the root will be the last node in the list
 /// </summary>
 /// <param name="hCluster">hierarcical clustering class object</param>
 public BinaryGuideTree(IHierarchicalClustering hCluster)
 {
     if (hCluster == null)
     {
         throw new ArgumentException("null Hierarchical clustering class");
     }
     if (hCluster.Nodes.Count == 0)
     {
         throw new ArgumentException("empty node list in Hierarchical clustering class");
     }
     _nodes          = hCluster.Nodes;
     _edges          = hCluster.Edges;
     _root           = hCluster.Nodes[hCluster.Nodes.Count - 1];
     _numberOfNodes  = hCluster.Nodes.Count;
     _numberOfLeaves = (_numberOfNodes + 2) / 2;
 }
Beispiel #7
0
        /// <summary>
        /// Combine cluster nextA and nextB into a new cluster
        /// </summary>
        /// <param name="distanceMatrix">distance matrix</param>
        private void CreateCluster(IDistanceMatrix distanceMatrix)
        {
            BinaryGuideTreeNode node = new BinaryGuideTreeNode(++_currentClusterID);

            // link the two nodes nextA and nextB with the new node
            node.LeftChildren    = Nodes[_nextA];
            node.RightChildren   = Nodes[_nextB];
            Nodes[_nextA].Parent = node;
            Nodes[_nextB].Parent = node;

            // use the leftmost leave's sequenceID
            int next = Math.Min(_nextA, _nextB);

            node.SequenceID = Nodes[next].SequenceID;
            _indexToCluster[node.SequenceID] = _currentClusterID;

            Nodes.Add(node);

            // Add edges
            BinaryGuideTreeEdge edgeA = new BinaryGuideTreeEdge(Nodes[_nextA].ID);
            BinaryGuideTreeEdge edgeB = new BinaryGuideTreeEdge(Nodes[_nextB].ID);

            edgeA.ParentNode = node;
            edgeB.ParentNode = node;
            edgeA.ChildNode  = Nodes[_nextA];
            edgeB.ChildNode  = Nodes[_nextB];

            Nodes[_nextA].ParentEdge = edgeA;
            Nodes[_nextB].ParentEdge = edgeB;

            // the length of the edge is the percent identity of two node sequences
            // or the average of identities between two sets of sequences
            //_edge1.Length = KimuraDistanceScoreCalculator.calculateDistanceScore(
            //    seqs[nodes[nextA].sequenceID], seqs[nodes[nextB].sequenceID]);

            // modified: define kimura distance as sequence distance
            edgeA.Length = _smallestDistance;
            edgeB.Length = _smallestDistance;

            _edges.Add(edgeA);
            _edges.Add(edgeB);
        }
Beispiel #8
0
        /// <summary>
        /// Use the input node as root, return the subtree *leaf* nodes of this node,
        /// and keep them in the alignment order by progressive aligner.
        /// </summary>
        /// <param name="node">root node in a (sub)tree</param>
        public List <BinaryGuideTreeNode> ExtractSubTreeLeafNodes(BinaryGuideTreeNode node)
        {
            // mark whether the node is descendent of this node
            List <bool> mark = new List <bool>(_numberOfNodes);

            for (int i = 0; i < _numberOfNodes; ++i)
            {
                mark.Add(false);
            }

            // Mark the subtree nodes by depth first search algorithm
            DepthFirstSearch(node, mark);

            List <BinaryGuideTreeNode> result = new List <BinaryGuideTreeNode>();

            for (int i = 0; i < mark.Count; ++i)
            {
                if (mark[i] && _nodes[i].IsLeaf)
                {
                    result.Add(_nodes[i]);
                }
            }
            return(result);
        }
Beispiel #9
0
 /// <summary>
 /// Construct a tree with one root node
 /// </summary>
 public BinaryGuideTree()
 {
     _root = new BinaryGuideTreeNode();
     _numberOfNodes = 1;
     _numberOfLeaves = 1;
 }
Beispiel #10
0
 /// <summary>
 /// Construct an edge with an assigned ID.
 /// </summary>
 /// <param name="id">zero-based edge ID</param>
 public BinaryGuideTreeEdge(int id)
 {
     _id        = id;
     ParentNode = null;
     ChildNode  = null;
 }
Beispiel #11
0
        /// <summary>
        /// Create a tree using an existing node as root
        /// </summary>
        /// <param name="r">a BinaryGuideTreeNode</param>
        public BinaryGuideTree(BinaryGuideTreeNode r)
        {
            if (r == null)
            {
                throw new ArgumentNullException("r");
            }

            _id = r.ID;
            _root = r;
            _numberOfNodes = 1;
            _numberOfLeaves = 1;
        }
Beispiel #12
0
        /// <summary>
        /// Main pregressive alignment algorithm aligns a set of sequences guided by
        /// a binary tree.
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="tree">a binary guide tree</param>
        public void Align(IList <ISequence> sequences, BinaryGuideTree tree)
        {
            SequenceWeighting sequenceWeighting = null;

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                sequenceWeighting = new SequenceWeighting(tree);

                /*
                 * for (int i = 0; i < sequenceWeighting.Weights.Length; ++i)
                 * {
                 *  sequenceWeighting.Weights[i] = 1;
                 * }
                 */
            }

            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty set of sequences");
            }
            IAlphabet alphabet = sequences[0].Alphabet;

            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]);
                    tree.Nodes[i].Weight           = sequenceWeighting.Weights[i];
                });
            }
            else
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                });
            }

            // Iterate internal nodes;
            // as defined in the tree, the last node is the root
            for (int i = sequences.Count; i < tree.Nodes.Count; ++i)
            {
                if (tree.Nodes[i].NeedReAlignment)
                {
                    // pull out its children
                    _nodeA = tree.Nodes[i].LeftChildren;
                    _nodeB = tree.Nodes[i].RightChildren;

                    if (PAMSAMMultipleSequenceAligner.UseWeights)
                    {
                        _profileAligner.Weights    = new float[2];
                        _profileAligner.Weights[0] = _nodeA.Weight;
                        _profileAligner.Weights[1] = _nodeB.Weight;

                        tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight;
                    }

                    // align two profiles
                    ProfileAlignment result = null;
                    if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences)
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeA.ProfileAlignment, _nodeB.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                    }
                    else
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeB.ProfileAlignment, _nodeA.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                    }


                    // children node profiles can be deleted
                    _nodeA.ProfileAlignment.Clear();
                    _nodeB.ProfileAlignment.Clear();
                }
            }

            // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings
            try
            {
                _alignedSequences = new List <ISequence>(sequences.Count);
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory", ex.InnerException);
            }

            for (int i = 0; i < sequences.Count; ++i)
            {
                _alignedSequences.Add(null);
            }

            Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                ISequence seq = sequences[i];
                BinaryGuideTreeNode node;
                node = tree.Nodes[i];
                while (!node.IsRoot)
                {
                    seq  = _profileAligner.GenerateSequenceFromEString(node.EString, seq);
                    node = node.Parent;
                }
                _alignedSequences[i] = seq;
            });
        }
Beispiel #13
0
 /// <summary>
 /// Construct an empty edge.
 /// </summary>
 public BinaryGuideTreeEdge()
 {
     _id        = -1;
     ParentNode = null;
     ChildNode  = null;
 }
Beispiel #14
0
 /// <summary>
 /// Construct an empty edge.
 /// </summary>
 public BinaryGuideTreeEdge()
 {
     _id = -1;
     ParentNode = null;
     ChildNode = null;
 }
Beispiel #15
0
 /// <summary>
 /// Construct an edge with an assigned ID.
 /// </summary>
 /// <param name="id">zero-based edge ID</param>
 public BinaryGuideTreeEdge(int id)
 {
     _id = id;
     ParentNode = null;
     ChildNode = null;
 }
Beispiel #16
0
 /// <summary>
 /// Construct a tree by hierarchical clustering method.
 /// 
 /// The node list is already generated in the hierarchical clustering method
 /// and the root will be the last node in the list
 /// </summary>
 /// <param name="hCluster">hierarcical clustering class object</param>
 public BinaryGuideTree(IHierarchicalClustering hCluster)
 {
     if (hCluster == null)
     {
         throw new ArgumentException("null Hierarchical clustering class");
     }
     if (hCluster.Nodes.Count == 0)
     {
         throw new ArgumentException("empty node list in Hierarchical clustering class");
     }
     _nodes = hCluster.Nodes;
     _edges = hCluster.Edges;
     _root = hCluster.Nodes[hCluster.Nodes.Count - 1];
     _numberOfNodes = hCluster.Nodes.Count;
     _numberOfLeaves = (_numberOfNodes + 2) / 2;
 }
Beispiel #17
0
        /// <summary>
        /// Main pregressive alignment algorithm aligns a set of sequences guided by
        /// a binary tree. 
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="tree">a binary guide tree</param>
        public void Align(IList<ISequence> sequences, BinaryGuideTree tree)
        {
            SequenceWeighting sequenceWeighting = null;
            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {

                sequenceWeighting = new SequenceWeighting(tree);
                /*
                for (int i = 0; i < sequenceWeighting.Weights.Length; ++i)
                {
                    sequenceWeighting.Weights[i] = 1;
                }
                */
            }

            if (sequences.Count==0)
            {
                throw new ArgumentException("Empty set of sequences");
            }
            IAlphabet alphabet = sequences[0].Alphabet;

            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]);
                    tree.Nodes[i].Weight = sequenceWeighting.Weights[i];
                });
            }
            else
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                });
            }
            
            // Iterate internal nodes; 
            // as defined in the tree, the last node is the root
            for (int i = sequences.Count; i < tree.Nodes.Count; ++i)
            {
                if (tree.Nodes[i].NeedReAlignment)
                {
                    // pull out its children
                    _nodeA = tree.Nodes[i].LeftChildren;
                    _nodeB = tree.Nodes[i].RightChildren;

                    if (PAMSAMMultipleSequenceAligner.UseWeights)
                    {
                        _profileAligner.Weights = new float[2];
                        _profileAligner.Weights[0] = _nodeA.Weight;
                        _profileAligner.Weights[1] = _nodeB.Weight;

                        tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight;
                    }

                    // align two profiles
                    ProfileAlignment result = null;
                    if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences)
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                                                    _nodeA.ProfileAlignment, _nodeB.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                    }
                    else
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                                                    _nodeB.ProfileAlignment, _nodeA.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                    }


                    // children node profiles can be deleted
                    _nodeA.ProfileAlignment.Clear();
                    _nodeB.ProfileAlignment.Clear();
                }
            }

            // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings
            try
            {
                _alignedSequences = new List<ISequence>(sequences.Count);
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory", ex.InnerException);
            }

            for (int i=0; i<sequences.Count;++i)
            {
                _alignedSequences.Add(null);
            }

            Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.ParallelOption, i =>
            {
                ISequence seq = sequences[i];
                BinaryGuideTreeNode node;
                node = tree.Nodes[i];
                while (!node.IsRoot)
                {
                    seq = _profileAligner.GenerateSequenceFromEString(node.EString, seq);
                    node = node.Parent;
                }
                _alignedSequences[i] = seq;
            });
        }
Beispiel #18
0
        /// <summary>
        /// Use the input node as root, return the subtree *leaf* nodes of this node, 
        /// and keep them in the alignment order by progressive aligner.
        /// </summary>
        /// <param name="node">root node in a (sub)tree</param>
        public List<BinaryGuideTreeNode> ExtractSubTreeLeafNodes(BinaryGuideTreeNode node)
        {
            // mark whether the node is descendent of this node
            List<bool> mark = new List<bool>(_numberOfNodes);
            for (int i = 0; i < _numberOfNodes; ++i)
            {
                mark.Add(false);
            }

            // Mark the subtree nodes by depth first search algorithm
            DepthFirstSearch(node, mark);

            List<BinaryGuideTreeNode> result = new List<BinaryGuideTreeNode>();
            for (int i = 0; i < mark.Count; ++i)
            {
                if (mark[i] && _nodes[i].IsLeaf)
                {
                    result.Add(_nodes[i]);
                }
            }
            return result;
        }
Beispiel #19
0
 /// <summary>
 /// Create a tree with an assigned tree ID
 /// </summary>
 /// <param name="id">zero-based tree ID</param>
 public BinaryGuideTree(int id)
 {
     _id = id;
     _root = new BinaryGuideTreeNode();
     _numberOfNodes = 1;
     _numberOfLeaves = 1;
 }
Beispiel #20
0
        /// <summary>
        /// Depth first search (DFS) algorithm (recursive version).
        /// Visit the subtree nodes from the current node, and
        /// mark visited nodes by DFS with 'true'.
        /// </summary>
        /// <param name="node">root node of the subtree</param>
        /// <param name="mark">Bool list of all nodes (in a tree)</param>
        public static void DepthFirstSearch(BinaryGuideTreeNode node, List<bool> mark)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            mark[node.ID] = true;
            while (node.LeftChildren != null)
            {
                if (mark[node.LeftChildren.ID] == false)
                {
                    DepthFirstSearch(node.LeftChildren, mark);
                }
                else
                {
                    break;
                }
            }

            while (node.RightChildren != null)
            {
                if (mark[node.RightChildren.ID] == false)
                {
                    DepthFirstSearch(node.RightChildren, mark);
                }
                else
                {
                    break;
                }
            }
        }
Beispiel #21
0
        /// <summary>
        /// Compare two (sub)trees from root to leaves,
        /// find the top node position that the subtrees of this node are
        /// different between the two trees
        /// 
        /// Normally nodeA is the root of newly generated tree A, and nodeB is the root
        /// of old tree B. This method returns the top node in tree A, so that the subtree
        /// of this node will be re-aligned.
        /// </summary>
        /// <param name="nodeA">root of (sub)tree</param>
        /// <param name="nodeB">root of (sub)tree</param>
        public static BinaryGuideTreeNode FindSmallestTreeDifference(BinaryGuideTreeNode nodeA, BinaryGuideTreeNode nodeB)
        {
            if (nodeA == null)
            {
                throw new ArgumentNullException("nodeA");
            }

            if (nodeB == null)
            {
                throw new ArgumentNullException("nodeB");
            }

            //TODO the order of left/right child nodes do not have to be the same to be identical
            if (nodeA != nodeB)
            {
                return nodeA;
            }
            else
            {
                while (nodeA.LeftChildren != null && nodeB.LeftChildren != null)
                {
                    return FindSmallestTreeDifference(nodeA.LeftChildren, nodeB.LeftChildren);
                }
                while (nodeA.RightChildren != null && nodeB.RightChildren != null)
                {
                    return FindSmallestTreeDifference(nodeA.RightChildren, nodeB.RightChildren);
                }
            }
            return null;
        }
Beispiel #22
0
 /// <summary>
 /// Construct a tree with one root node
 /// </summary>
 public BinaryGuideTree()
 {
     _root           = new BinaryGuideTreeNode();
     _numberOfNodes  = 1;
     _numberOfLeaves = 1;
 }