Example #1
0
        public void TestProgressiveAligner()
        {
            MsaUtils.SetProfileItemSets(MoleculeType.DNA);

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;
            int kmerLength       = 4;

            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGACAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            progressiveAligner.Align(sequences, tree);

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");
            ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");

            Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString());
            Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString());
            Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString());



            sequences = new List <ISequence>();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.NumberOfNodes; ++i)
            {
                Console.WriteLine("Node {0} ID: {1}", i, tree.Nodes[i].ID);
            }
            for (int i = 0; i < tree.NumberOfEdges; ++i)
            {
                Console.WriteLine("Edge {0} ID: {1}, length: {2}", i, tree.Edges[i].ID, tree.Edges[i].Length);
            }

            SequenceWeighting sw = new SequenceWeighting(tree);

            for (int i = 0; i < sw.Weights.Length; ++i)
            {
                Console.WriteLine("weights {0} is {1}", i, sw.Weights[i]);
            }

            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }


            MsaUtils.SetProfileItemSets(MoleculeType.Protein);
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\Protein\BB11001.tfa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
            kmerLength       = 4;
            int numberOfSequences = orgSequences.Count;

            gapOpenPenalty   = -13;
            gapExtendPenalty = -5;

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = tree.NumberOfLeaves; i < tree.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, tree.Nodes[i].LeftChildren.ID, tree.Nodes[i].RightChildren.ID);
            }
            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }
        }
        public void TestHierarchicalClusteringSerial()
        {
            int             dimension      = 4;
            IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension);

            for (int i = 0; i < distanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < distanceMatrix.Dimension; ++j)
                {
                    distanceMatrix[i, j] = i + j;
                    distanceMatrix[j, i] = i + j;
                }
            }

            PAMSAMMultipleSequenceAligner.ParallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix);

            Assert.AreEqual(7, hierarchicalClustering.Nodes.Count);
            for (int i = 0; i < dimension * 2 - 1; ++i)
            {
                Assert.AreEqual(i, hierarchicalClustering.Nodes[i].ID);
            }

            for (int i = dimension; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine(hierarchicalClustering.Nodes[i].LeftChildren.ID);
                Console.WriteLine(hierarchicalClustering.Nodes[i].RightChildren.ID);
            }

            // Test on sequences
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            int kmerLength = 4;
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA);

            //Console.WriteLine(kmerDistanceMatrixGenerator.Name);
            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);
            //Console.WriteLine(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = 0; i < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension; ++j)
                {
                    Console.WriteLine("{0}-{1}: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                }
            }

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);
            for (int i = 0; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, hierarchicalClustering.Nodes[i].NeedReAlignment);
            }

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, tree.Nodes[i].NeedReAlignment);
            }


            // SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            //Assert.AreEqual(0, hierarchicalClustering.Nodes[4].LeftChildren.ID);
            //Assert.AreEqual(1, hierarchicalClustering.Nodes[4].RightChildren.ID);
            //Assert.AreEqual(2, hierarchicalClustering.Nodes[5].LeftChildren.ID);
            //Assert.AreEqual(4, hierarchicalClustering.Nodes[5].RightChildren.ID);
            //Assert.AreEqual(3, hierarchicalClustering.Nodes[6].LeftChildren.ID);
            //Assert.AreEqual(5, hierarchicalClustering.Nodes[6].RightChildren.ID);

            // Test on larger dataset
            string            filepath     = @"TestUtils\Fasta\RV11_BBS_all.afa".TestDir();
            FastAParser       parser       = new FastAParser();
            IList <ISequence> orgSequences = parser.Parse(filepath).ToList();

            sequences = MsaUtils.UnAlign(orgSequences);

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = sequences.Count; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, hierarchicalClustering.Nodes[i].LeftChildren.ID, hierarchicalClustering.Nodes[i].RightChildren.ID);
            }
        }
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        private void DoAlignment(IList<ISequence> sequences)
        {
            Debug.Assert(this.alphabet != null);
            Debug.Assert(sequences.Count > 0);

            // Initializations
            if (ConsensusResolver == null)
                ConsensusResolver = new SimpleConsensusResolver(this.alphabet);
            else
                ConsensusResolver.SequenceAlphabet = this.alphabet;

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;
            switch (ProfileAlignerName)
            {
                case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                    if (this.degreeOfParallelism == 1)
                    {
                        profileAligner = new NeedlemanWunschProfileAlignerSerial(
                            SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                    }
                    else
                    {
                        profileAligner = new NeedlemanWunschProfileAlignerParallel(
                            SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                    }
                    break;
                case (ProfileAlignerNames.SmithWatermanProfileAligner):
                    if (this.degreeOfParallelism == 1)
                    {
                        profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);

                    }
                    else
                    {
                        profileAligner = new SmithWatermanProfileAlignerParallel(
                    SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);

                    }
                    break;
                default:
                    throw new ArgumentException("Invalid profile aligner name");
            }

            this.AlignedSequences = new List<ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            ReportLog("Stage 1");
            // Generate DistanceMatrix
            var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

            // Generate Guide Tree
            var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);
            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > this.AlignmentScoreA)
            {
                this.AlignmentScoreA = currentScore;
                this.AlignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (this.AlignmentScoreA > this.AlignmentScore)
            {
                this.AlignmentScore = this.AlignmentScoreA;
                this.AlignedSequences = this.AlignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                this.AlignedSequencesB = this.AlignedSequencesA;
                this.AlignedSequencesC = this.AlignedSequencesA;
                this.AlignmentScoreB = this.AlignmentScoreA;
                this.AlignmentScoreC = this.AlignmentScoreA;
            }
            else
            {
                BinaryGuideTree binaryGuideTreeB = null;
                IHierarchicalClustering hierarcicalClusteringB = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (UseStageB)
                {
                    // STAGE 2
                    ReportLog("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    while (true)
                    {
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreB)
                        {
                            this.AlignmentScoreB = currentScore;
                            this.AlignedSequencesB = progressiveAlignerB.AlignedSequences;
                        }
                        break;
                    }
                    if (this.AlignmentScoreB > this.AlignmentScore)
                    {
                        this.AlignmentScore = this.AlignmentScoreB;
                        this.AlignedSequences = this.AlignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                ReportLog("Stage 3");
                // refinement
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                this.AlignedSequencesC = new List<ISequence>(this.AlignedSequences.Count);
                foreach (ISequence t in this.AlignedSequences)
                {
                    this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray())
                        {
                            ID = t.ID,
                            // Do not shallow copy dictionary
                            //Metadata = t.Metadata
                        });
                }

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    ReportLog("Refinement iter " + refinementTime);
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        List<int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        List<int>[] allIndelPositions = new List<int>[2];

                        IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        List<int>[] eStrings = new List<int>[2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], ParallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List<byte> seqBytes = new List<byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(this.AlignedSequencesC[i][j]);
                                    }
                                }

                                this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray()));
                                this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID;
                                // Do not shallow copy dictionary
                                //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreC)
                        {
                            this.AlignmentScoreC = currentScore;
                            needRefinement = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                    (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }

                }
                if (this.AlignmentScoreC > this.AlignmentScore)
                {
                    this.AlignmentScore = this.AlignmentScoreC;
                    this.AlignedSequences = this.AlignedSequencesC;
                }
                ReportLog("Stop Stage 3");
            }
        }
Example #4
0
        /// <summary>
        ///     Get Hierarchical Clustering using kmerdistancematrix\kimura distance matrix.
        /// </summary>
        /// <param name="distanceMatrix"></param>
        /// <param name="hierarchicalClusteringMethodName"></param>
        /// <returns>Hierarchical clustering</returns>
        private static IHierarchicalClustering GetHierarchicalClustering(IDistanceMatrix distanceMatrix)
        {
            // Hierarchical clustering with default distance method name
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel(distanceMatrix);

            return hierarcicalClustering;
        }
Example #5
0
        /// <summary>
        ///     Get Hierarchical Clustering using kmerdistancematrix\kimura distance matrix and hierarchical method name.
        /// </summary>
        /// <param name="distanceMatrix">distance matrix.</param>
        /// <param name="hierarchicalClusteringMethodName">Hierarchical clustering method name.</param>
        /// <returns>Hierarchical clustering</returns>
        private static IHierarchicalClustering GetHierarchicalClustering(IDistanceMatrix distanceMatrix,
                                                                         UpdateDistanceMethodsTypes
                                                                             hierarchicalClusteringMethodName)
        {
            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel(distanceMatrix, hierarchicalClusteringMethodName);

            return hierarcicalClustering;
        }
Example #6
0
        public void TestBinaryGuideTree()
        {
            int numberOfNodes = 5;
            List <BinaryGuideTreeNode> nodes = new List <BinaryGuideTreeNode>(numberOfNodes);

            for (int i = 0; i < numberOfNodes; ++i)
            {
                nodes.Add(new BinaryGuideTreeNode(i));
            }

            nodes[3].LeftChildren  = nodes[0];
            nodes[3].RightChildren = nodes[1];
            nodes[4].LeftChildren  = nodes[3];
            nodes[4].RightChildren = nodes[2];

            nodes[0].Parent = nodes[3];
            nodes[1].Parent = nodes[3];
            nodes[2].Parent = nodes[4];
            nodes[3].Parent = nodes[4];


            Assert.IsFalse(nodes[0].IsRoot);
            Assert.IsTrue(nodes[0].IsLeaf);

            Assert.IsFalse(nodes[1].IsRoot);
            Assert.IsTrue(nodes[1].IsLeaf);

            Assert.IsFalse(nodes[2].IsRoot);
            Assert.IsTrue(nodes[2].IsLeaf);

            Assert.IsFalse(nodes[3].IsRoot);
            Assert.IsFalse(nodes[3].IsLeaf);

            Assert.IsTrue(nodes[4].IsRoot);
            Assert.IsFalse(nodes[4].IsLeaf);

            Assert.AreEqual(nodes[3], nodes[0].Parent);


            int numberOfEdges = 4;
            List <BinaryGuideTreeEdge> edges = new List <BinaryGuideTreeEdge>(numberOfEdges);

            for (int i = 0; i < numberOfEdges; ++i)
            {
                edges.Add(new BinaryGuideTreeEdge(i));
            }

            edges[0].ParentNode = nodes[3];
            edges[0].ChildNode  = nodes[0];
            edges[1].ParentNode = nodes[3];
            edges[1].ChildNode  = nodes[1];

            edges[2].ParentNode = nodes[4];
            edges[2].ChildNode  = nodes[2];
            edges[3].ParentNode = nodes[4];
            edges[3].ChildNode  = nodes[3];

            int             dimension      = 4;
            IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension);

            for (int i = 0; i < distanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < distanceMatrix.Dimension; ++j)
                {
                    distanceMatrix[i, j] = i + j;
                    distanceMatrix[j, i] = i + j;
                }
            }

            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };

            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix);

            BinaryGuideTree binaryGuideTree = new BinaryGuideTree(hierarchicalClustering);

            Assert.AreEqual(7, binaryGuideTree.NumberOfNodes);
            Assert.AreEqual(6, binaryGuideTree.NumberOfEdges);
            Assert.AreEqual(4, binaryGuideTree.NumberOfLeaves);

            Assert.IsTrue(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1].IsRoot);

            for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i)
            {
                Console.WriteLine(binaryGuideTree.Nodes[i].ID);
            }

            // Test ExtractSubTreeNodes
            Console.WriteLine("binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[4]).Count : {0}", binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[4]).Count);
            Console.WriteLine("binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count : {0}", binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count);
            Console.WriteLine("binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[0]).Count : {0}", binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[0]).Count);
            //Assert.AreEqual(3, binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[4]).Count);
            //Assert.AreEqual(7, binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count);
            //Assert.AreEqual(1, binaryGuideTree.ExtractSubTreeNodes(binaryGuideTree.Nodes[0]).Count);

            // Test ExtractSubTreeLeafNodes
            Console.WriteLine("binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[4]).Count : {0}", binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[4]).Count);
            Console.WriteLine("binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Coun : {0}", binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count);
            Console.WriteLine("binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[0]).Count : {0}", binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[0]).Count);
            //Assert.AreEqual(2, binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[4]).Count);
            //Assert.AreEqual(4, binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1]).Count);
            //Assert.AreEqual(1, binaryGuideTree.ExtractSubTreeLeafNodes(binaryGuideTree.Nodes[0]).Count);


            // Test FindSmallestTreeDifference
            BinaryGuideTree     binaryGuideTreeB = new BinaryGuideTree(hierarchicalClustering);
            BinaryGuideTreeNode node             = BinaryGuideTree.FindSmallestTreeDifference(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1], binaryGuideTreeB.Nodes[binaryGuideTreeB.Nodes.Count - 1]);

            Assert.IsNull(node);
            node = BinaryGuideTree.FindSmallestTreeDifference(binaryGuideTree.Nodes[binaryGuideTree.Nodes.Count - 1], binaryGuideTreeB.Nodes[0]);
            Assert.IsNotNull(node);

            // Test CompareTwoTrees

            for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i)
            {
                Console.Write(binaryGuideTree.Nodes[i].ID);
            }
            for (int i = 0; i < binaryGuideTreeB.Nodes.Count; ++i)
            {
                Console.Write(binaryGuideTreeB.Nodes[i].ID);
            }

            BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB);

            for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i)
            {
                Console.Write(binaryGuideTree.Nodes[i].ID);
                Console.Write(binaryGuideTree.Nodes[i].NeedReAlignment);
                binaryGuideTree.Nodes[i].NeedReAlignment = false;
            }
            for (int i = 0; i < binaryGuideTreeB.Nodes.Count; ++i)
            {
                Console.Write(binaryGuideTreeB.Nodes[i].ID);
                Console.Write(binaryGuideTreeB.Nodes[i].NeedReAlignment);
                binaryGuideTreeB.Nodes[i].NeedReAlignment = false;
            }
            Assert.IsFalse(binaryGuideTree.Nodes[4].NeedReAlignment);
            Assert.IsFalse(binaryGuideTree.Nodes[5].NeedReAlignment);
            Assert.IsFalse(binaryGuideTree.Nodes[6].NeedReAlignment);

            for (int i = binaryGuideTree.NumberOfLeaves; i < binaryGuideTree.NumberOfNodes; ++i)
            {
                Assert.IsFalse(binaryGuideTree.Nodes[i].NeedReAlignment);
            }

            binaryGuideTreeB.Root.ID = 7;
            BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB);

            Assert.IsFalse(binaryGuideTree.Root.NeedReAlignment);

            binaryGuideTreeB.Nodes[5].ID = 8;
            BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB);

            for (int i = 0; i < binaryGuideTree.Nodes.Count; ++i)
            {
                Console.WriteLine(binaryGuideTree.Nodes[i].ID);
                Console.WriteLine(binaryGuideTree.Nodes[i].NeedReAlignment);
            }
            for (int i = 0; i < binaryGuideTreeB.Nodes.Count; ++i)
            {
                Console.WriteLine(binaryGuideTreeB.Nodes[i].ID);
                Console.WriteLine(binaryGuideTreeB.Nodes[i].NeedReAlignment);
            }

            Assert.IsFalse(binaryGuideTree.Nodes[5].NeedReAlignment);
            Assert.IsFalse(binaryGuideTree.Root.NeedReAlignment);

            binaryGuideTreeB.Nodes[5].LeftChildren = binaryGuideTreeB.Nodes[3];
            BinaryGuideTree.CompareTwoTrees(binaryGuideTree, binaryGuideTreeB);
            Assert.IsTrue(binaryGuideTree.Nodes[5].NeedReAlignment);
            Assert.IsTrue(binaryGuideTree.Root.NeedReAlignment);
            Assert.IsFalse(binaryGuideTree.Nodes[4].NeedReAlignment);

            // Test SeparateSequencesByCuttingTree
            List <int>[] newSequences = binaryGuideTree.SeparateSequencesByCuttingTree(3);
            //Assert.AreEqual(2, newSequences[0].Count);
            //Assert.AreEqual(2, newSequences[1].Count);
            Console.WriteLine("newSequences[0].Count: {0}", newSequences[0].Count);
            Console.WriteLine("newSequences[1].Count: {0}", newSequences[1].Count);


            List <int>[] newSequencesB = binaryGuideTree.SeparateSequencesByCuttingTree(2);
            //Assert.AreEqual(1, newSequencesB[0].Count);
            //Assert.AreEqual(3, newSequencesB[1].Count);
            Console.WriteLine("newSequences[0].Count: {0}", newSequencesB[0].Count);
            Console.WriteLine("newSequences[1].Count: {0}", newSequencesB[1].Count);

            // Cut tree test
            ISequence seq1 = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seq2 = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence seq3 = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence seq4 = new Sequence(Alphabets.DNA, "GGGAAATCG");
            ISequence seq5 = new Sequence(Alphabets.DNA, "GGGAATCAATCAG");
            ISequence seq6 = new Sequence(Alphabets.DNA, "GGGACAAAATCAG");
            ISequence seq7 = new Sequence(Alphabets.DNA, "GGGAATCTTATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seq1);
            sequences.Add(seq2);
            sequences.Add(seq3);
            sequences.Add(seq4);
            sequences.Add(seq5);
            sequences.Add(seq6);
            sequences.Add(seq7);


            // Generate DistanceMatrix
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, 2, MoleculeType.DNA, DistanceFunctionTypes.EuclideanDistance);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix, UpdateDistanceMethodsTypes.Average);

            //// Generate Guide Tree
            binaryGuideTree = new BinaryGuideTree(hierarcicalClustering);

            // CUT Tree
            BinaryGuideTree[] subtrees = binaryGuideTree.CutTree(3);

            Assert.IsNotNull(subtrees);
        }