/// <summary> /// Create a tree using an existing node as root /// </summary> /// <param name="r">a BinaryGuideTreeNode</param> public BinaryGuideTree(BinaryGuideTreeNode r) { _id = r.ID; _root = r; _numberOfNodes = 1; _numberOfLeaves = 1; }
/// <summary> /// Depth first search (DFS) algorithm (recursive version). /// Visit the subtree nodes from the current node, and /// mark visited nodes by DFS with 'true'. /// </summary> /// <param name="node">root node of the subtree</param> /// <param name="mark">Bool list of all nodes (in a tree)</param> public static void DepthFirstSearch(BinaryGuideTreeNode node, List <bool> mark) { mark[node.ID] = true; while (node.LeftChildren != null) { if (mark[node.LeftChildren.ID] == false) { DepthFirstSearch(node.LeftChildren, mark); } else { break; } } while (node.RightChildren != null) { if (mark[node.RightChildren.ID] == false) { DepthFirstSearch(node.RightChildren, mark); } else { break; } } }
/// <summary> /// Create a tree with an assigned tree ID /// </summary> /// <param name="id">zero-based tree ID</param> public BinaryGuideTree(int id) { _id = id; _root = new BinaryGuideTreeNode(); _numberOfNodes = 1; _numberOfLeaves = 1; }
/// <summary> /// Construct a tree by hierarchical clustering method. /// /// The node list is already generated in the hierarchical clustering method /// and the root will be the last node in the list /// </summary> /// <param name="hCluster">hierarcical clustering class object</param> public BinaryGuideTree(IHierarchicalClustering hCluster) { if (hCluster == null) { throw new ArgumentException("null Hierarchical clustering class"); } if (hCluster.Nodes.Count == 0) { throw new ArgumentException("empty node list in Hierarchical clustering class"); } _nodes = hCluster.Nodes; _edges = hCluster.Edges; _root = hCluster.Nodes[hCluster.Nodes.Count - 1]; _numberOfNodes = hCluster.Nodes.Count; _numberOfLeaves = (_numberOfNodes + 2) / 2; }
/// <summary> /// Combine cluster nextA and nextB into a new cluster /// </summary> /// <param name="distanceMatrix">distance matrix</param> private void CreateCluster(IDistanceMatrix distanceMatrix) { BinaryGuideTreeNode node = new BinaryGuideTreeNode(++_currentClusterID); // link the two nodes nextA and nextB with the new node node.LeftChildren = Nodes[_nextA]; node.RightChildren = Nodes[_nextB]; Nodes[_nextA].Parent = node; Nodes[_nextB].Parent = node; // use the leftmost leave's sequenceID int next = Math.Min(_nextA, _nextB); node.SequenceID = Nodes[next].SequenceID; _indexToCluster[node.SequenceID] = _currentClusterID; Nodes.Add(node); // Add edges BinaryGuideTreeEdge edgeA = new BinaryGuideTreeEdge(Nodes[_nextA].ID); BinaryGuideTreeEdge edgeB = new BinaryGuideTreeEdge(Nodes[_nextB].ID); edgeA.ParentNode = node; edgeB.ParentNode = node; edgeA.ChildNode = Nodes[_nextA]; edgeB.ChildNode = Nodes[_nextB]; Nodes[_nextA].ParentEdge = edgeA; Nodes[_nextB].ParentEdge = edgeB; // the length of the edge is the percent identity of two node sequences // or the average of identities between two sets of sequences //_edge1.Length = KimuraDistanceScoreCalculator.calculateDistanceScore( // seqs[nodes[nextA].sequenceID], seqs[nodes[nextB].sequenceID]); // modified: define kimura distance as sequence distance edgeA.Length = _smallestDistance; edgeB.Length = _smallestDistance; _edges.Add(edgeA); _edges.Add(edgeB); }
/// <summary> /// Compare two (sub)trees from root to leaves, /// find the top node position that the subtrees of this node are /// different between the two trees /// /// Normally nodeA is the root of newly generated tree A, and nodeB is the root /// of old tree B. This method returns the top node in tree A, so that the subtree /// of this node will be re-aligned. /// </summary> /// <param name="nodeA">root of (sub)tree</param> /// <param name="nodeB">root of (sub)tree</param> public static BinaryGuideTreeNode FindSmallestTreeDifference(BinaryGuideTreeNode nodeA, BinaryGuideTreeNode nodeB) { //TODO the order of left/right child nodes do not have to be the same to be identical if (nodeA != nodeB) { return(nodeA); } else { while (nodeA.LeftChildren != null && nodeB.LeftChildren != null) { return(FindSmallestTreeDifference(nodeA.LeftChildren, nodeB.LeftChildren)); } while (nodeA.RightChildren != null && nodeB.RightChildren != null) { return(FindSmallestTreeDifference(nodeA.RightChildren, nodeB.RightChildren)); } } return(null); }
/// <summary> /// Use the input node as root, return the subtree *leaf* nodes of this node, /// and keep them in the alignment order by progressive aligner. /// </summary> /// <param name="node">root node in a (sub)tree</param> public List <BinaryGuideTreeNode> ExtractSubTreeLeafNodes(BinaryGuideTreeNode node) { // mark whether the node is descendent of this node List <bool> mark = new List <bool>(_numberOfNodes); for (int i = 0; i < _numberOfNodes; ++i) { mark.Add(false); } // Mark the subtree nodes by depth first search algorithm DepthFirstSearch(node, mark); List <BinaryGuideTreeNode> result = new List <BinaryGuideTreeNode>(); for (int i = 0; i < mark.Count; ++i) { if (mark[i] && _nodes[i].IsLeaf) { result.Add(_nodes[i]); } } return(result); }
/// <summary> /// Main pregressive alignment algorithm aligns a set of sequences guided by /// a binary tree. /// </summary> /// <param name="sequences">input sequences</param> /// <param name="tree">a binary guide tree</param> public void Align(IList <ISequence> sequences, BinaryGuideTree tree) { SequenceWeighting sequenceWeighting = null; if (PAMSAMMultipleSequenceAligner.UseWeights) { sequenceWeighting = new SequenceWeighting(tree); /* * for (int i = 0; i < sequenceWeighting.Weights.Length; ++i) * { * sequenceWeighting.Weights[i] = 1; * } */ } if (sequences.Count == 0) { throw new ArgumentException("Empty set of sequences"); } IAlphabet alphabet = sequences[0].Alphabet; Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { if (sequences[i].Alphabet != alphabet) { throw new ArgumentException("Inconsistent sequence alphabet"); } }); if (PAMSAMMultipleSequenceAligner.UseWeights) { // Generate profile for leaf nodes Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]); tree.Nodes[i].Weight = sequenceWeighting.Weights[i]; }); } else { // Generate profile for leaf nodes Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]); }); } // Iterate internal nodes; // as defined in the tree, the last node is the root for (int i = sequences.Count; i < tree.Nodes.Count; ++i) { if (tree.Nodes[i].NeedReAlignment) { // pull out its children _nodeA = tree.Nodes[i].LeftChildren; _nodeB = tree.Nodes[i].RightChildren; if (PAMSAMMultipleSequenceAligner.UseWeights) { _profileAligner.Weights = new float[2]; _profileAligner.Weights[0] = _nodeA.Weight; _profileAligner.Weights[1] = _nodeB.Weight; tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight; } // align two profiles ProfileAlignment result = null; if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences) { result = (ProfileAlignment)_profileAligner.Align( _nodeA.ProfileAlignment, _nodeB.ProfileAlignment); // assign aligned profiles to the current internal node tree.Nodes[i].ProfileAlignment = result; // generate eString for the children nodes _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA); _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB); } else { result = (ProfileAlignment)_profileAligner.Align( _nodeB.ProfileAlignment, _nodeA.ProfileAlignment); // assign aligned profiles to the current internal node tree.Nodes[i].ProfileAlignment = result; // generate eString for the children nodes _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB); _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA); } // children node profiles can be deleted _nodeA.ProfileAlignment.Clear(); _nodeB.ProfileAlignment.Clear(); } } // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings try { _alignedSequences = new List <ISequence>(sequences.Count); } catch (OutOfMemoryException ex) { throw new Exception("Out of memory", ex.InnerException); } for (int i = 0; i < sequences.Count; ++i) { _alignedSequences.Add(null); } Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { ISequence seq = sequences[i]; BinaryGuideTreeNode node; node = tree.Nodes[i]; while (!node.IsRoot) { seq = _profileAligner.GenerateSequenceFromEString(node.EString, seq); node = node.Parent; } _alignedSequences[i] = seq; }); }
/// <summary> /// Construct a tree with one root node /// </summary> public BinaryGuideTree() { _root = new BinaryGuideTreeNode(); _numberOfNodes = 1; _numberOfLeaves = 1; }
/// <summary> /// Construct an edge with an assigned ID. /// </summary> /// <param name="id">zero-based edge ID</param> public BinaryGuideTreeEdge(int id) { _id = id; ParentNode = null; ChildNode = null; }
/// <summary> /// Construct an empty edge. /// </summary> public BinaryGuideTreeEdge() { _id = -1; ParentNode = null; ChildNode = null; }