Ejemplo n.º 1
0
        public void TestProgressiveAligner()
        {
            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrices.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;
            int kmerLength       = 3;

            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGACAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringSerial(kmerDistanceMatrixGenerator.DistanceMatrix);

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            progressiveAligner.Align(sequences, tree);

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");
            ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");

            Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString());
            Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString());
            Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString());
        }
Ejemplo n.º 2
0
        public void TestProgressiveAligner()
        {
            MsaUtils.SetProfileItemSets(MoleculeType.DNA);

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;
            int kmerLength       = 4;

            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGACAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            progressiveAligner.Align(sequences, tree);

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");
            ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");

            Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString());
            Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString());
            Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString());



            sequences = new List <ISequence>();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.NumberOfNodes; ++i)
            {
                Console.WriteLine("Node {0} ID: {1}", i, tree.Nodes[i].ID);
            }
            for (int i = 0; i < tree.NumberOfEdges; ++i)
            {
                Console.WriteLine("Edge {0} ID: {1}, length: {2}", i, tree.Edges[i].ID, tree.Edges[i].Length);
            }

            SequenceWeighting sw = new SequenceWeighting(tree);

            for (int i = 0; i < sw.Weights.Length; ++i)
            {
                Console.WriteLine("weights {0} is {1}", i, sw.Weights[i]);
            }

            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }


            MsaUtils.SetProfileItemSets(MoleculeType.Protein);
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\Protein\BB11001.tfa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
            kmerLength       = 4;
            int numberOfSequences = orgSequences.Count;

            gapOpenPenalty   = -13;
            gapExtendPenalty = -5;

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = tree.NumberOfLeaves; i < tree.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, tree.Nodes[i].LeftChildren.ID, tree.Nodes[i].RightChildren.ID);
            }
            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }
        }
        public void TestHierarchicalClusteringSerial()
        {
            int             dimension      = 4;
            IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension);

            for (int i = 0; i < distanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < distanceMatrix.Dimension; ++j)
                {
                    distanceMatrix[i, j] = i + j;
                    distanceMatrix[j, i] = i + j;
                }
            }

            PAMSAMMultipleSequenceAligner.ParallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix);

            Assert.AreEqual(7, hierarchicalClustering.Nodes.Count);
            for (int i = 0; i < dimension * 2 - 1; ++i)
            {
                Assert.AreEqual(i, hierarchicalClustering.Nodes[i].ID);
            }

            for (int i = dimension; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine(hierarchicalClustering.Nodes[i].LeftChildren.ID);
                Console.WriteLine(hierarchicalClustering.Nodes[i].RightChildren.ID);
            }

            // Test on sequences
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            int kmerLength = 4;
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA);

            //Console.WriteLine(kmerDistanceMatrixGenerator.Name);
            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);
            //Console.WriteLine(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = 0; i < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension; ++j)
                {
                    Console.WriteLine("{0}-{1}: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                }
            }

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);
            for (int i = 0; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, hierarchicalClustering.Nodes[i].NeedReAlignment);
            }

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, tree.Nodes[i].NeedReAlignment);
            }


            // SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            //Assert.AreEqual(0, hierarchicalClustering.Nodes[4].LeftChildren.ID);
            //Assert.AreEqual(1, hierarchicalClustering.Nodes[4].RightChildren.ID);
            //Assert.AreEqual(2, hierarchicalClustering.Nodes[5].LeftChildren.ID);
            //Assert.AreEqual(4, hierarchicalClustering.Nodes[5].RightChildren.ID);
            //Assert.AreEqual(3, hierarchicalClustering.Nodes[6].LeftChildren.ID);
            //Assert.AreEqual(5, hierarchicalClustering.Nodes[6].RightChildren.ID);

            // Test on larger dataset
            string            filepath     = @"TestUtils\Fasta\RV11_BBS_all.afa".TestDir();
            FastAParser       parser       = new FastAParser();
            IList <ISequence> orgSequences = parser.Parse(filepath).ToList();

            sequences = MsaUtils.UnAlign(orgSequences);

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = sequences.Count; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, hierarchicalClustering.Nodes[i].LeftChildren.ID, hierarchicalClustering.Nodes[i].RightChildren.ID);
            }
        }