public void TestProgressiveAligner() { MsaUtils.SetProfileItemSets(MoleculeType.DNA); SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); int gapOpenPenalty = -8; int gapExtendPenalty = -1; int kmerLength = 4; PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions { MaxDegreeOfParallelism = 2 }; ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seqC = new Sequence(Alphabets.DNA, "GGGACAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); sequences.Add(seqC); KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering); IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty); progressiveAligner.Align(sequences, tree); ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT"); ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"); ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---"); Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString()); Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString()); Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString()); sequences = new List <ISequence>(); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); tree = new BinaryGuideTree(hierarchicalClustering); for (int i = 0; i < tree.NumberOfNodes; ++i) { Console.WriteLine("Node {0} ID: {1}", i, tree.Nodes[i].ID); } for (int i = 0; i < tree.NumberOfEdges; ++i) { Console.WriteLine("Edge {0} ID: {1}, length: {2}", i, tree.Edges[i].ID, tree.Edges[i].Length); } SequenceWeighting sw = new SequenceWeighting(tree); for (int i = 0; i < sw.Weights.Length; ++i) { Console.WriteLine("weights {0} is {1}", i, sw.Weights[i]); } progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty); progressiveAligner.Align(sequences, tree); for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i) { Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString()); } MsaUtils.SetProfileItemSets(MoleculeType.Protein); ISequenceParser parser = new FastaParser(); string filepath = @"testdata\FASTA\Protein\BB11001.tfa"; IList <ISequence> orgSequences = parser.Parse(filepath); sequences = MsaUtils.UnAlign(orgSequences); similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62); kmerLength = 4; int numberOfSequences = orgSequences.Count; gapOpenPenalty = -13; gapExtendPenalty = -5; kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); tree = new BinaryGuideTree(hierarchicalClustering); for (int i = tree.NumberOfLeaves; i < tree.Nodes.Count; ++i) { Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, tree.Nodes[i].LeftChildren.ID, tree.Nodes[i].RightChildren.ID); } progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty); progressiveAligner.Align(sequences, tree); for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i) { Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString()); } }
public void TestHierarchicalClusteringSerial() { int dimension = 4; IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension); for (int i = 0; i < distanceMatrix.Dimension - 1; ++i) { for (int j = i + 1; j < distanceMatrix.Dimension; ++j) { distanceMatrix[i, j] = i + j; distanceMatrix[j, i] = i + j; } } PAMSAMMultipleSequenceAligner.ParallelOption = new ParallelOptions { MaxDegreeOfParallelism = 2 }; IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix); Assert.AreEqual(7, hierarchicalClustering.Nodes.Count); for (int i = 0; i < dimension * 2 - 1; ++i) { Assert.AreEqual(i, hierarchicalClustering.Nodes[i].ID); } for (int i = dimension; i < hierarchicalClustering.Nodes.Count; ++i) { Console.WriteLine(hierarchicalClustering.Nodes[i].LeftChildren.ID); Console.WriteLine(hierarchicalClustering.Nodes[i].RightChildren.ID); } // Test on sequences ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seqC = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seqA); sequences.Add(seqB); sequences.Add(seqC); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT")); sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG")); sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG")); int kmerLength = 4; KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA); //Console.WriteLine(kmerDistanceMatrixGenerator.Name); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); //Console.WriteLine(kmerDistanceMatrixGenerator.DistanceMatrix); for (int i = 0; i < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension - 1; ++i) { for (int j = i + 1; j < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension; ++j) { Console.WriteLine("{0}-{1}: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]); } } hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); for (int i = 0; i < hierarchicalClustering.Nodes.Count; ++i) { Assert.AreEqual(true, hierarchicalClustering.Nodes[i].NeedReAlignment); } BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering); for (int i = 0; i < tree.Nodes.Count; ++i) { Assert.AreEqual(true, tree.Nodes[i].NeedReAlignment); } // SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); //Assert.AreEqual(0, hierarchicalClustering.Nodes[4].LeftChildren.ID); //Assert.AreEqual(1, hierarchicalClustering.Nodes[4].RightChildren.ID); //Assert.AreEqual(2, hierarchicalClustering.Nodes[5].LeftChildren.ID); //Assert.AreEqual(4, hierarchicalClustering.Nodes[5].RightChildren.ID); //Assert.AreEqual(3, hierarchicalClustering.Nodes[6].LeftChildren.ID); //Assert.AreEqual(5, hierarchicalClustering.Nodes[6].RightChildren.ID); // Test on larger dataset string filepath = @"TestUtils\Fasta\RV11_BBS_all.afa".TestDir(); FastAParser parser = new FastAParser(); IList <ISequence> orgSequences = parser.Parse(filepath).ToList(); sequences = MsaUtils.UnAlign(orgSequences); kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA); kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences); hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix); for (int i = sequences.Count; i < hierarchicalClustering.Nodes.Count; ++i) { Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, hierarchicalClustering.Nodes[i].LeftChildren.ID, hierarchicalClustering.Nodes[i].RightChildren.ID); } }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="sequences">Input sequences</param> /// <returns>Alignment results</returns> private void DoAlignment(IList<ISequence> sequences) { Debug.Assert(this.alphabet != null); Debug.Assert(sequences.Count > 0); // Initializations if (ConsensusResolver == null) ConsensusResolver = new SimpleConsensusResolver(this.alphabet); else ConsensusResolver.SequenceAlphabet = this.alphabet; // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (ProfileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } this.AlignedSequences = new List<ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 ReportLog("Stage 1"); // Generate DistanceMatrix var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreA) { this.AlignmentScoreA = currentScore; this.AlignedSequencesA = progressiveAlignerA.AlignedSequences; } if (this.AlignmentScoreA > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreA; this.AlignedSequences = this.AlignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { this.AlignedSequencesB = this.AlignedSequencesA; this.AlignedSequencesC = this.AlignedSequencesA; this.AlignmentScoreB = this.AlignmentScoreA; this.AlignmentScoreC = this.AlignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (UseStageB) { // STAGE 2 ReportLog("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment while (true) { kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreB) { this.AlignmentScoreB = currentScore; this.AlignedSequencesB = progressiveAlignerB.AlignedSequences; } break; } if (this.AlignmentScoreB > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreB; this.AlignedSequences = this.AlignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 ReportLog("Stage 3"); // refinement int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; this.AlignedSequencesC = new List<ISequence>(this.AlignedSequences.Count); foreach (ISequence t in this.AlignedSequences) { this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray()) { ID = t.ID, // Do not shallow copy dictionary //Metadata = t.Metadata }); } while (refinementTime < maxRefineMentTime) { ++refinementTime; ReportLog("Refinement iter " + refinementTime); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { List<int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); List<int>[] allIndelPositions = new List<int>[2]; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); List<int>[] eStrings = new List<int>[2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], ParallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List<byte> seqBytes = new List<byte>(); int indexAllIndel = 0; for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(this.AlignedSequencesC[i][j]); } } this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray())); this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID; // Do not shallow copy dictionary //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreC) { this.AlignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (this.AlignmentScoreC > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreC; this.AlignedSequences = this.AlignedSequencesC; } ReportLog("Stop Stage 3"); } }
/// <summary> /// Get Hierarchical Clustering using kmerdistancematrix\kimura distance matrix. /// </summary> /// <param name="distanceMatrix"></param> /// <param name="hierarchicalClusteringMethodName"></param> /// <returns>Hierarchical clustering</returns> private static IHierarchicalClustering GetHierarchicalClustering(IDistanceMatrix distanceMatrix) { // Hierarchical clustering with default distance method name IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel(distanceMatrix); return hierarcicalClustering; }
/// <summary> /// Get Hierarchical Clustering using kmerdistancematrix\kimura distance matrix and hierarchical method name. /// </summary> /// <param name="distanceMatrix">distance matrix.</param> /// <param name="hierarchicalClusteringMethodName">Hierarchical clustering method name.</param> /// <returns>Hierarchical clustering</returns> private static IHierarchicalClustering GetHierarchicalClustering(IDistanceMatrix distanceMatrix, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName) { // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel(distanceMatrix, hierarchicalClusteringMethodName); return hierarcicalClustering; }
public void TestBinaryGuideTree() { int numberOfNodes = 5; List <BinaryGuideTreeNode> nodes = new List <BinaryGuideTreeNode>(numberOfNodes); for (int i = 0; i < numberOfNodes; ++i) { nodes.Add(new BinaryGuideTreeNode(i)); } nodes[3].LeftChildren = nodes[0]; nodes[3].RightChildren = nodes[1]; nodes[4].LeftChildren = nodes[3]; nodes[4].RightChildren = nodes[2]; nodes[0].Parent = nodes[3]; nodes[1].Parent = nodes[3]; nodes[2].Parent = nodes[4]; nodes[3].Parent = nodes[4]; Assert.IsFalse(nodes[0].IsRoot); Assert.IsTrue(nodes[0].IsLeaf); Assert.IsFalse(nodes[1].IsRoot); Assert.IsTrue(nodes[1].IsLeaf); Assert.IsFalse(nodes[2].IsRoot); Assert.IsTrue(nodes[2].IsLeaf); Assert.IsFalse(nodes[3].IsRoot); Assert.IsFalse(nodes[3].IsLeaf); Assert.IsTrue(nodes[4].IsRoot); Assert.IsFalse(nodes[4].IsLeaf); Assert.AreEqual(nodes[3], nodes[0].Parent); int numberOfEdges = 4; List <BinaryGuideTreeEdge> edges = new List <BinaryGuideTreeEdge>(numberOfEdges); for (int i = 0; i < numberOfEdges; ++i) { edges.Add(new BinaryGuideTreeEdge(i)); } edges[0].ParentNode = nodes[3]; edges[0].ChildNode = nodes[0]; edges[1].ParentNode = nodes[3]; edges[1].ChildNode = nodes[1]; edges[2].ParentNode = nodes[4]; edges[2].ChildNode = nodes[2]; edges[3].ParentNode = nodes[4]; edges[3].ChildNode = nodes[3]; int dimension = 4; IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension); for (int i = 0; i < distanceMatrix.Dimension - 1; ++i) { for (int j = i + 1; j < distanceMatrix.Dimension; ++j) { distanceMatrix[i, j] = i + j; distanceMatrix[j, i] = i + j; } } PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions { MaxDegreeOfParallelism = 2 }; IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix); BinaryGuideTree binaryGuideTree = new BinaryGuideTree(hierarchicalClustering); Assert.AreEqual(7, binaryGuideTree.NumberOfNodes); Assert.AreEqual(6, binaryGuideTree.NumberOfEdges); Assert.AreEqual(4, binaryGuideTree.NumberOfLeaves); Assert.IsTrue(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1].IsRoot); for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i) { Console.WriteLine(binaryGuideTree.Nodes[i].ID); } // Test ExtractSubTreeNodes Console.WriteLine("binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[4]).Count : {0}", binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[4]).Count); Console.WriteLine("binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count : {0}", binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count); Console.WriteLine("binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[0]).Count : {0}", binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[0]).Count); //Assert.AreEqual(3, binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[4]).Count); //Assert.AreEqual(7, binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count); //Assert.AreEqual(1, binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[0]).Count); // Test ExtractSubTreeLeafNodes Console.WriteLine("binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[4]).Count : {0}", binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[4]).Count); Console.WriteLine("binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Coun : {0}", binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count); Console.WriteLine("binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[0]).Count : {0}", binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[0]).Count); //Assert.AreEqual(2, binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[4]).Count); //Assert.AreEqual(4, binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count); //Assert.AreEqual(1, binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[0]).Count); // Test FindSmallestTreeDifference BinaryGuideTree binaryGuideTreeB = new BinaryGuideTree(hierarchicalClustering); BinaryGuideTreeNode node = BinaryGuideTree.FindSmallestTreeDifference(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1], binaryGuideTreeB.Nodes[binaryGuideTreeB.Nodes.Count - 1]); Assert.IsNull(node); node = BinaryGuideTree.FindSmallestTreeDifference(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1], binaryGuideTreeB.Nodes[0]); Assert.IsNotNull(node); // Test CompareTwoTrees for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i) { Console.Write(binaryGuideTree.Nodes[i].ID); } for (int i = 0; i < binaryGuideTreeB.Nodes.Count; ++i) { Console.Write(binaryGuideTreeB.Nodes[i].ID); } BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB); for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i) { Console.Write(binaryGuideTree.Nodes[i].ID); Console.Write(binaryGuideTree.Nodes[i].NeedReAlignment); binaryGuideTree.Nodes[i].NeedReAlignment = false; } for (int i = 0; i < binaryGuideTreeB.Nodes.Count; ++i) { Console.Write(binaryGuideTreeB.Nodes[i].ID); Console.Write(binaryGuideTreeB.Nodes[i].NeedReAlignment); binaryGuideTreeB.Nodes[i].NeedReAlignment = false; } Assert.IsFalse(binaryGuideTree.Nodes[4].NeedReAlignment); Assert.IsFalse(binaryGuideTree.Nodes[5].NeedReAlignment); Assert.IsFalse(binaryGuideTree.Nodes[6].NeedReAlignment); for (int i = binaryGuideTree.NumberOfLeaves; i < binaryGuideTree.NumberOfNodes; ++i) { Assert.IsFalse(binaryGuideTree.Nodes[i].NeedReAlignment); } binaryGuideTreeB.Root.ID = 7; BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB); Assert.IsFalse(binaryGuideTree.Root.NeedReAlignment); binaryGuideTreeB.Nodes[5].ID = 8; BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB); for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i) { Console.WriteLine(binaryGuideTree.Nodes[i].ID); Console.WriteLine(binaryGuideTree.Nodes[i].NeedReAlignment); } for (int i = 0; i < binaryGuideTreeB.Nodes.Count; ++i) { Console.WriteLine(binaryGuideTreeB.Nodes[i].ID); Console.WriteLine(binaryGuideTreeB.Nodes[i].NeedReAlignment); } Assert.IsFalse(binaryGuideTree.Nodes[5].NeedReAlignment); Assert.IsFalse(binaryGuideTree.Root.NeedReAlignment); binaryGuideTreeB.Nodes[5].LeftChildren = binaryGuideTreeB.Nodes[3]; BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB); Assert.IsTrue(binaryGuideTree.Nodes[5].NeedReAlignment); Assert.IsTrue(binaryGuideTree.Root.NeedReAlignment); Assert.IsFalse(binaryGuideTree.Nodes[4].NeedReAlignment); // Test SeparateSequencesByCuttingTree List <int>[] newSequences = binaryGuideTree.SeparateSequencesByCuttingTree(3); //Assert.AreEqual(2, newSequences[0].Count); //Assert.AreEqual(2, newSequences[1].Count); Console.WriteLine("newSequences[0].Count: {0}", newSequences[0].Count); Console.WriteLine("newSequences[1].Count: {0}", newSequences[1].Count); List <int>[] newSequencesB = binaryGuideTree.SeparateSequencesByCuttingTree(2); //Assert.AreEqual(1, newSequencesB[0].Count); //Assert.AreEqual(3, newSequencesB[1].Count); Console.WriteLine("newSequences[0].Count: {0}", newSequencesB[0].Count); Console.WriteLine("newSequences[1].Count: {0}", newSequencesB[1].Count); // Cut tree test ISequence seq1 = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"); ISequence seq2 = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seq3 = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"); ISequence seq4 = new Sequence(Alphabets.DNA, "GGGAAATCG"); ISequence seq5 = new Sequence(Alphabets.DNA, "GGGAATCAATCAG"); ISequence seq6 = new Sequence(Alphabets.DNA, "GGGACAAAATCAG"); ISequence seq7 = new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"); List <ISequence> sequences = new List <ISequence>(); sequences.Add(seq1); sequences.Add(seq2); sequences.Add(seq3); sequences.Add(seq4); sequences.Add(seq5); sequences.Add(seq6); sequences.Add(seq7); // Generate DistanceMatrix KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, 2, MoleculeType.DNA, DistanceFunctionTypes.EuclideanDistance); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix, UpdateDistanceMethodsTypes.Average); //// Generate Guide Tree binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // CUT Tree BinaryGuideTree[] subtrees = binaryGuideTree.CutTree(3); Assert.IsNotNull(subtrees); }