Ejemplo n.º 1
0
        public void testBug2()
        {
            //Test on DNA benchmark dataset
            string      filepath = @"TestUtils\122_raw.afa".TestDir();
            FastAParser parser   = new FastAParser();

            IList <ISequence> orgSequences = parser.Parse(filepath).ToList();

            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            int gapOpenPenalty     = -13;
            int gapExtendPenalty   = -5;
            int kmerLength         = 2;
            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Assert.IsNotNull(msa.AlignedSequences);
        }
Ejemplo n.º 2
0
        public void testBug3()
        {
            //Test on DNA benchmark dataset
            ISequenceParser parser   = new FastaParser();
            string          filepath = @"TestUtils\122_raw.afa";

            MoleculeType mt = MoleculeType.DNA;

            IList <ISequence> orgSequences = parser.Parse(filepath);

            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 2;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;


            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast;

            SimilarityMatrix similarityMatrix = null;

            switch (mt)
            {
            case (MoleculeType.DNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                break;

            case (MoleculeType.RNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                break;

            case (MoleculeType.Protein):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                break;

            default:
                throw new InvalidDataException("Invalid molecular type");
            }

            //DateTime startTime = DateTime.Now;
            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Assert.IsNotNull(msa.AlignedSequences);

            ((FastaParser)parser).Dispose();
        }
Ejemplo n.º 3
0
        public void TestNeedlemanWunschProfileAligner()
        {
            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;


            IProfileAligner  profileAligner   = new NeedlemanWunschProfileAligner();
            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrices.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;

            profileAligner.SimilarityMatrix = similarityMatrix;
            profileAligner.GapOpenCost      = gapOpenPenalty;
            profileAligner.GapExtensionCost = gapExtendPenalty;

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            Assert.AreEqual(expectedSeqA.ToString(), alignedSequences[0].ToString());
            Assert.AreEqual(expectedSeqB.ToString(), alignedSequences[1].ToString());

            Assert.AreEqual(40, profileScore);
        }
Ejemplo n.º 4
0
        public void TestKimuraDistanceMatrixGenerator()
        {
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(new Sequence(Alphabets.DNA, "ACGTAA"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            int kmerLength = 3;

            // test kmer counting
            KmerDistanceScoreCalculator kmerDistanceScoreCalculator =
                new KmerDistanceScoreCalculator(kmerLength, MoleculeType.DNA);

            Dictionary <String, float> countDictionaryA =
                KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[0], kmerLength);
            Dictionary <String, float> countDictionaryB =
                KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[1], kmerLength);

            Dictionary <String, float> expectedCountDictionaryA = new Dictionary <String, float>();

            expectedCountDictionaryA.Add("ACG", 1);
            expectedCountDictionaryA.Add("CGT", 1);
            expectedCountDictionaryA.Add("GTA", 1);
            expectedCountDictionaryA.Add("TAA", 1);

            Assert.AreEqual(countDictionaryA["ACG"], expectedCountDictionaryA["ACG"]);
            Assert.AreEqual(countDictionaryA["CGT"], expectedCountDictionaryA["CGT"]);
            Assert.AreEqual(countDictionaryA["GTA"], expectedCountDictionaryA["GTA"]);
            Assert.AreEqual(countDictionaryA["TAA"], expectedCountDictionaryA["TAA"]);

            Dictionary <String, float> expectedCountDictionaryB = new Dictionary <String, float>();

            expectedCountDictionaryB.Add("GGG", 1);
            expectedCountDictionaryB.Add("GGA", 1);
            expectedCountDictionaryB.Add("GAA", 1);
            expectedCountDictionaryB.Add("AAT", 2);
            expectedCountDictionaryB.Add("ATC", 2);
            expectedCountDictionaryB.Add("TCA", 2);
            expectedCountDictionaryB.Add("CAA", 1);
            expectedCountDictionaryB.Add("CAG", 1);

            Assert.AreEqual(countDictionaryB["GGG"], expectedCountDictionaryB["GGG"]);
            Assert.AreEqual(countDictionaryB["GGA"], expectedCountDictionaryB["GGA"]);
            Assert.AreEqual(countDictionaryB["GAA"], expectedCountDictionaryB["GAA"]);
            Assert.AreEqual(countDictionaryB["AAT"], expectedCountDictionaryB["AAT"]);
            Assert.AreEqual(countDictionaryB["ATC"], expectedCountDictionaryB["ATC"]);
            Assert.AreEqual(countDictionaryB["TCA"], expectedCountDictionaryB["TCA"]);
            Assert.AreEqual(countDictionaryB["CAA"], expectedCountDictionaryB["CAA"]);
            Assert.AreEqual(countDictionaryB["CAG"], expectedCountDictionaryB["CAG"]);

            foreach (var pair in countDictionaryA)
            {
                foreach (char s in pair.Key)
                {
                    Console.Write(s + " ");
                }
                Console.WriteLine(pair.Value);
            }
            foreach (var pair in countDictionaryB)
            {
                foreach (char s in pair.Key)
                {
                    Console.Write(s + " ");
                }
                Console.WriteLine(pair.Value);
            }

            float distanceScore = kmerDistanceScoreCalculator.CalculateDistanceScore(countDictionaryA, countDictionaryB);

            Console.WriteLine(distanceScore);

            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            for (int i = 0; i < sequences.Count - 1; ++i)
            {
                for (int j = i + 1; j < sequences.Count; ++j)
                {
                    Console.WriteLine("Kmer Distance of sequence {0}, and {1} is: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                }
            }


            // test kmer counting CoVariance
            KmerDistanceScoreCalculator kmerDistanceScoreCalculatorB = new KmerDistanceScoreCalculator(kmerLength, MoleculeType.DNA, DistanceFunctionTypes.CoVariance);

            countDictionaryA = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[0], kmerLength);
            countDictionaryB = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[1], kmerLength);

            distanceScore = kmerDistanceScoreCalculatorB.CalculateDistanceScore(countDictionaryA, countDictionaryB);
            Console.WriteLine(distanceScore);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGeneratorB = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA, DistanceFunctionTypes.CoVariance);

            for (int i = 0; i < sequences.Count - 1; ++i)
            {
                for (int j = i + 1; j < sequences.Count; ++j)
                {
                    Console.WriteLine("Kmer Distance of sequence {0}, and {1} is: {2}", i, j, kmerDistanceMatrixGeneratorB.DistanceMatrix[i, j]);
                }
            }


            // test kmer counting ModifiedMUSCLE
            KmerDistanceScoreCalculator kmerDistanceScoreCalculatorC = new KmerDistanceScoreCalculator(kmerLength, MoleculeType.DNA, DistanceFunctionTypes.ModifiedMUSCLE);

            countDictionaryA = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[0], kmerLength);
            countDictionaryB = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[1], kmerLength);

            distanceScore = kmerDistanceScoreCalculatorC.CalculateDistanceScore(countDictionaryA, countDictionaryB);
            Console.WriteLine(distanceScore);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGeneratorC = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA, DistanceFunctionTypes.ModifiedMUSCLE);

            for (int i = 0; i < sequences.Count - 1; ++i)
            {
                for (int j = i + 1; j < sequences.Count; ++j)
                {
                    Console.WriteLine("Kmer Distance of sequence {0}, and {1} is: {2}", i, j, kmerDistanceMatrixGeneratorC.DistanceMatrix[i, j]);
                }
            }

            // test kmer counting PearsonCorrelation
            KmerDistanceScoreCalculator kmerDistanceScoreCalculatorD = new KmerDistanceScoreCalculator(kmerLength, MoleculeType.DNA, DistanceFunctionTypes.PearsonCorrelation);

            countDictionaryA = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[0], kmerLength);
            countDictionaryB = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[1], kmerLength);

            distanceScore = kmerDistanceScoreCalculatorD.CalculateDistanceScore(countDictionaryA, countDictionaryB);
            Console.WriteLine(distanceScore);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGeneratorD = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA, DistanceFunctionTypes.PearsonCorrelation);

            for (int i = 0; i < sequences.Count - 1; ++i)
            {
                for (int j = i + 1; j < sequences.Count; ++j)
                {
                    Console.WriteLine("Kmer Distance of sequence {0}, and {1} is: {2}", i, j, kmerDistanceMatrixGeneratorD.DistanceMatrix[i, j]);
                }
            }


            // Test for case 2
            sequences.Clear();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            // test kmer counting

            countDictionaryA = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[0], kmerLength);
            countDictionaryB = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[1], kmerLength);

            foreach (var pair in countDictionaryA)
            {
                foreach (char s in pair.Key)
                {
                    Console.Write(s + " ");
                }
                Console.WriteLine(pair.Value);
            }
            foreach (var pair in countDictionaryB)
            {
                foreach (char s in pair.Key)
                {
                    Console.Write(s + " ");
                }
                Console.WriteLine(pair.Value);
            }

            distanceScore = kmerDistanceScoreCalculator.CalculateDistanceScore(countDictionaryA, countDictionaryB);
            Console.WriteLine(distanceScore);

            kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            for (int i = 0; i < sequences.Count - 1; ++i)
            {
                for (int j = i + 1; j < sequences.Count; ++j)
                {
                    Console.WriteLine("Kmer Distance of sequence {0}, and {1} is: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                }
            }

            // Test on larger dataset
            sequences = new List <ISequence>();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            kmerLength = 4;
            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA, DistanceFunctionTypes.EuclideanDistance);

            kmerDistanceScoreCalculator = new KmerDistanceScoreCalculator(kmerLength, MoleculeType.DNA, DistanceFunctionTypes.EuclideanDistance);
            for (int i = 0; i < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension; ++j)
                {
                    countDictionaryA = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[i], kmerLength);
                    countDictionaryB = KmerDistanceScoreCalculator.CalculateKmerCounting(sequences[j], kmerLength);
                    MsaUtils.Normalize(countDictionaryA);
                    MsaUtils.Normalize(countDictionaryB);
                    float score = kmerDistanceScoreCalculator.CalculateDistanceScore(countDictionaryA, countDictionaryB);
                    Console.WriteLine("{0}-{1}: {2}", i, j, score);
                    Console.WriteLine("{0}-{1}: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                    // Assert.AreEqual(score, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                }
            }
        }
Ejemplo n.º 5
0
        public void PerformPAMSAMPerf()
        {
            Stopwatch _watchObj = new Stopwatch();

            // Get input values from XML.
            string refPath =
                Utility._xmlUtil.GetTextValue(Constants.PamsamNode,
                                              Constants.RefFilePathNode);
            string queryPath =
                Utility._xmlUtil.GetTextValue(Constants.PamsamNode,
                                              Constants.QueryFilePathNode);

            // Create a List for input files.
            List <string> lstInputFiles = new List <string>();

            lstInputFiles.Add(refPath);
            lstInputFiles.Add(queryPath);

            // Parse a Reference and query sequence file.
            ISequenceParser   parser       = new FastaParser();
            IList <ISequence> refsequences = parser.Parse(queryPath);
            IList <ISequence> orgSequences = parser.Parse(refPath);

            // Execute UnAlign method to verify that it does not contains gap
            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            // Set static properties
            PAMSAMMultipleSequenceAligner.FasterVersion = true;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            // Set Alignment parameters.
            int gapOpenPenalty     = -13;
            int gapExtendPenalty   = -5;
            int kmerLength         = 2;
            int numberOfDegrees    = 2;
            int numberOfPartitions = 4;

            // Profile Distance function name
            DistanceFunctionTypes distanceFunctionName =
                DistanceFunctionTypes.EuclideanDistance;

            // Set Hierarchical clustering.
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName =
                UpdateDistanceMethodsTypes.Average;

            // Set NeedlemanWunschProfileAligner
            ProfileAlignerNames profileAlignerName =
                ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames profileProfileFunctionName =
                ProfileScoreFunctionNames.InnerProduct;

            // Create similarity matrix instance.
            SimilarityMatrix similarityMatrix =
                new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);

            // Reset stop watch and start timer.
            _watchObj.Reset();
            _watchObj.Start();
            long memoryStart = GC.GetTotalMemory(true);

            // Parallel Option will only get set if the PAMSAMMultipleSequenceAligner is getting called
            // To test separately distance matrix, binary tree etc..
            // Set the parallel option using below ctor.
            msa = new PAMSAMMultipleSequenceAligner
                      (sequences, MoleculeType.DNA, kmerLength, distanceFunctionName,
                      hierarchicalClusteringMethodName, profileAlignerName,
                      profileProfileFunctionName, similarityMatrix, gapOpenPenalty,
                      gapExtendPenalty, numberOfPartitions, numberOfDegrees);

            // Stop watchclock.
            _watchObj.Stop();
            long memoryEnd = GC.GetTotalMemory(true);

            string memoryUsed = (memoryEnd - memoryStart).ToString();

            // Display all aligned sequence, performance and memory optimization nos.
            DisplayTestCaseHeader(lstInputFiles, _watchObj,
                                  memoryUsed, "PAMSAM");

            Console.WriteLine(string.Format(
                                  "PAMSAM SequenceAligner method, Alignment Score is : {0}",
                                  msa.AlignmentScore.ToString()));
            int index = 0;

            foreach (ISequence seq in msa.AlignedSequences)
            {
                Console.WriteLine(string.Format(
                                      "PAMSAM Aligned Seq {0}:{1}", index, seq.ToString()));
                index++;
            }
        }
        public void TestHierarchicalClusteringSerial()
        {
            int             dimension      = 4;
            IDistanceMatrix distanceMatrix = new SymmetricDistanceMatrix(dimension);

            for (int i = 0; i < distanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < distanceMatrix.Dimension; ++j)
                {
                    distanceMatrix[i, j] = i + j;
                    distanceMatrix[j, i] = i + j;
                }
            }

            PAMSAMMultipleSequenceAligner.ParallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(distanceMatrix);

            Assert.AreEqual(7, hierarchicalClustering.Nodes.Count);
            for (int i = 0; i < dimension * 2 - 1; ++i)
            {
                Assert.AreEqual(i, hierarchicalClustering.Nodes[i].ID);
            }

            for (int i = dimension; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine(hierarchicalClustering.Nodes[i].LeftChildren.ID);
                Console.WriteLine(hierarchicalClustering.Nodes[i].RightChildren.ID);
            }

            // Test on sequences
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            int kmerLength = 4;
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA);

            //Console.WriteLine(kmerDistanceMatrixGenerator.Name);
            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);
            //Console.WriteLine(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = 0; i < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension - 1; ++i)
            {
                for (int j = i + 1; j < kmerDistanceMatrixGenerator.DistanceMatrix.Dimension; ++j)
                {
                    Console.WriteLine("{0}-{1}: {2}", i, j, kmerDistanceMatrixGenerator.DistanceMatrix[i, j]);
                }
            }

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);
            for (int i = 0; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, hierarchicalClustering.Nodes[i].NeedReAlignment);
            }

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.Nodes.Count; ++i)
            {
                Assert.AreEqual(true, tree.Nodes[i].NeedReAlignment);
            }


            // SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            //Assert.AreEqual(0, hierarchicalClustering.Nodes[4].LeftChildren.ID);
            //Assert.AreEqual(1, hierarchicalClustering.Nodes[4].RightChildren.ID);
            //Assert.AreEqual(2, hierarchicalClustering.Nodes[5].LeftChildren.ID);
            //Assert.AreEqual(4, hierarchicalClustering.Nodes[5].RightChildren.ID);
            //Assert.AreEqual(3, hierarchicalClustering.Nodes[6].LeftChildren.ID);
            //Assert.AreEqual(5, hierarchicalClustering.Nodes[6].RightChildren.ID);

            // Test on larger dataset
            string            filepath     = @"TestUtils\Fasta\RV11_BBS_all.afa".TestDir();
            FastAParser       parser       = new FastAParser();
            IList <ISequence> orgSequences = parser.Parse(filepath).ToList();

            sequences = MsaUtils.UnAlign(orgSequences);

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, Alphabets.AmbiguousDNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            for (int i = sequences.Count; i < hierarchicalClustering.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, hierarchicalClustering.Nodes[i].LeftChildren.ID, hierarchicalClustering.Nodes[i].RightChildren.ID);
            }
        }
Ejemplo n.º 7
0
        public void TestNeedlemanWunschProfileAligner()
        {
            Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount);

            ISequence templateSequence     = new Sequence(Alphabets.AmbiguousDNA, "ATGCSWRYKMBVHDN-");
            Dictionary <byte, int> itemSet = new Dictionary <byte, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);

                if (char.IsLetter((char)templateSequence[i]))
                {
                    itemSet.Add((byte)char.ToLower((char)templateSequence[i]), i);
                }
            }
            Profiles.ItemSet = itemSet;



            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -3;
            int gapExtendPenalty = -1;

            IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                                     gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            Console.WriteLine("alignment score is: {0}", profileScore);

            Console.WriteLine("the aligned sequences are:");
            for (int i = 0; i < alignedSequences.Count; ++i)
            {
                Console.WriteLine(new string(alignedSequences[i].Select(a => (char)a).ToArray()));
            }

            // Test on case 3: 36 sequences
            string            filepath     = @"\TestUtils\RV11_BBS_allSmall.afa";
            string            filePathObj  = Directory.GetCurrentDirectory() + filepath;
            FastAParser       parser       = new FastAParser(filePathObj);
            IList <ISequence> orgSequences = parser.Parse().ToList();

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original unaligned sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(">");
                Console.WriteLine(new string(sequences[i].Select(a => (char)a).ToArray()));
            }

            for (int i = 1; i < numberOfSequences - 1; ++i)
            {
                for (int j = i + 1; j < numberOfSequences; ++j)
                {
                    profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                    profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]);

                    profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                             gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);
                    profileAligner.Align(profileAlignmentA, profileAlignmentB);

                    eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
                    eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

                    Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count);
                    Console.WriteLine("estring 1:");
                    for (int k = 0; k < eStringSubtree.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtree[k]);
                    }
                    Console.WriteLine("\nestring 2:");
                    for (int k = 0; k < eStringSubtreeB.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtreeB[k]);
                    }

                    alignedSequences = new List <ISequence>();

                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]);
                    alignedSequences.Add(seq);
                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]);
                    alignedSequences.Add(seq);

                    profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

                    Console.WriteLine("\nalignment score is: {0}", profileScore);

                    Console.WriteLine("the aligned sequences are:");
                    for (int k = 0; k < alignedSequences.Count; ++k)
                    {
                        Console.WriteLine(new string(alignedSequences[k].Select(a => (char)a).ToArray()));
                    }
                }
                ((FastAParser)parser).Dispose();
            }
        }
Ejemplo n.º 8
0
        public void TestAlignmentScore()
        {
            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;

            // Test PairWiseScoreFunction
            ISequence seqA  = new Sequence(Alphabets.DNA, "ACG");
            ISequence seqB  = new Sequence(Alphabets.DNA, "ACG");
            float     score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);



            //Assert.AreEqual(15, score);

            seqA = new Sequence(Alphabets.DNA, "ACG");
            seqB = new Sequence(Alphabets.DNA, "ACC");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(6, score);

            seqA = new Sequence(Alphabets.DNA, "AC-");
            seqB = new Sequence(Alphabets.DNA, "ACC");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(2, score);

            seqA = new Sequence(Alphabets.DNA, "AC--");
            seqB = new Sequence(Alphabets.DNA, "ACCG");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(1, score);


            seqA = new Sequence(Alphabets.DNA, "A---");
            seqB = new Sequence(Alphabets.DNA, "A--C");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(-3, score);


            seqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            seqB = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(42, score);

            seqA = new Sequence(Alphabets.DNA, "GGG---AAAAATCAGATT");
            seqB = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(33, score);

            seqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(40, score);

            seqA = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");
            seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            score = MsaUtils.PairWiseScoreFunction(seqA, seqB, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            //Assert.AreEqual(56, score);

            // Test MultipleAlignmentScoreFunction
            List <ISequence> sequences = new List <ISequence>();

            seqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            seqB = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");
            sequences.Add(seqA);
            sequences.Add(seqB);
            score = MsaUtils.MultipleAlignmentScoreFunction(sequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            Console.WriteLine("alignment score is: {0}", score);
            for (int i = 0; i < sequences.Count; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }
            //Assert.AreEqual(42, score);

            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"));
            score = MsaUtils.MultipleAlignmentScoreFunction(sequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            Console.WriteLine("alignment score is: {0}", score);
            for (int i = 0; i < sequences.Count; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }
            //Assert.AreEqual(46, score);

            sequences[0] = new Sequence(Alphabets.DNA, "GGG---AAAAATCAGATT");
            score        = MsaUtils.MultipleAlignmentScoreFunction(sequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            for (int i = 0; i < sequences.Count; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }
            Console.WriteLine("alignment score is: {0}", score);
            for (int i = 0; i < sequences.Count; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }
            //Assert.AreEqual(40, score);

            // Test CalculateOffset
            seqA = new Sequence(Alphabets.DNA, "ABCD");
            seqB = new Sequence(Alphabets.DNA, "ABCD");

            List <int> offset = MsaUtils.CalculateOffset(seqA, seqB);

            Console.WriteLine("offsets are:");
            for (int i = 0; i < offset.Count; ++i)
            {
                Console.Write("{0}\t", offset[i]);
            }

            seqA   = new Sequence(Alphabets.DNA, "A-BCD");
            seqB   = new Sequence(Alphabets.DNA, "AB-CD");
            offset = MsaUtils.CalculateOffset(seqA, seqB);
            Console.WriteLine("\noffsets are:");
            for (int i = 0; i < offset.Count; ++i)
            {
                Console.Write("{0}\t", offset[i]);
            }

            seqA   = new Sequence(Alphabets.DNA, "A-BCD");
            seqB   = new Sequence(Alphabets.DNA, "----AB-CD");
            offset = MsaUtils.CalculateOffset(seqA, seqB);
            Console.WriteLine("\noffsets are:");
            for (int i = 0; i < offset.Count; ++i)
            {
                Console.Write("{0}\t", offset[i]);
            }

            sequences.Clear();
            sequences.Add(seqA);
            sequences.Add(new Sequence(Alphabets.DNA, "ABBCG"));

            List <ISequence> sequencesRef = new List <ISequence>();

            sequencesRef.Add(seqA);
            sequencesRef.Add(new Sequence(Alphabets.DNA, "ABBCG"));

            for (int i = 0; i < sequences.Count; ++i)
            {
                offset = MsaUtils.CalculateOffset(sequences[i], sequencesRef[i]);
                Console.WriteLine("\noffsets are:");
                for (int j = 0; j < offset.Count; ++j)
                {
                    Console.Write("{0}\t", offset[j]);
                }
            }

            Console.WriteLine("Q score is: {0}", MsaUtils.CalculateAlignmentScoreQ(sequences, sequencesRef));
            Console.WriteLine("TC score is: {0}", MsaUtils.CalculateAlignmentScoreTC(sequences, sequencesRef));



            // Test on one example
            sequences.Clear();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA---A-AAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCA-AAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCA-AAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA---A-A--TC-G---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCA-A--TCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTA--TCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA--CA-AAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA---A-AAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCA-AAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA--CA-AAATCAG---"));

            gapOpenPenalty   = -4;
            gapExtendPenalty = -1;

            Console.WriteLine("score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(sequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));

            sequences.Clear();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA-----AATC-G---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATC--AATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTA-TCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---"));

            Console.WriteLine("score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(sequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));

            // Test Quick Sort
            float[] a = new float[5] {
                0, 2, 1, 5, 4
            };
            int[] aIndex = new int[5] {
                0, 1, 2, 3, 4
            };
            MsaUtils.QuickSort(a, aIndex, 0, a.Length - 1);
            Console.WriteLine("quicksort");
            for (int i = 0; i < a.Length; ++i)
            {
                Console.WriteLine(a[i]);
            }
            for (int i = 0; i < a.Length; ++i)
            {
                Console.WriteLine(aIndex[i]);
            }

            Console.WriteLine("quicksortM");
            a = new float[5] {
                0, 2, 1, 5, 4
            };
            int[] aIndexB = null;
            MsaUtils.QuickSortM(a, out aIndexB, 0, 4);
            for (int i = 0; i < a.Length; ++i)
            {
                Console.WriteLine(a[i]);
            }
            for (int i = 0; i < a.Length; ++i)
            {
                Console.WriteLine(aIndexB[i]);
            }

            Console.WriteLine("quicksort");
            a = new float[5] {
                0, 2, 1, 5, 4
            };
            int[] aIndexC = MsaUtils.CreateIndexArray(a.Length);
            MsaUtils.QuickSort(a, aIndexC, 0, a.Length - 1);
            for (int i = 0; i < a.Length; ++i)
            {
                Console.WriteLine(aIndexC[i]);
            }

            a = new float[5] {
                1, 0, 0, 0, 0
            };
            aIndex = new int[5] {
                0, 1, 2, 3, 4
            };
            MsaUtils.QuickSort(a, aIndex, 0, a.Length - 1);
            for (int i = 0; i < a.Length; ++i)
            {
                Console.WriteLine(a[i]);
            }
            for (int i = 0; i < a.Length; ++i)
            {
                Console.WriteLine(aIndex[i]);
            }
        }
Ejemplo n.º 9
0
        public void TestMsaBenchMarkLargeDataset()
        {
            // Test on DNA benchmark dataset
            string filePathObj  = @"TestUtils\BOX032Small.xml.afa".TestDir();
            var    orgSequences = new FastAParser().Parse(filePathObj).ToList();

            var sequences         = MsaUtils.UnAlign(orgSequences);
            int numberOfSequences = orgSequences.Count;

            Assert.AreEqual(numberOfSequences, sequences.Count);

            string outputFilePath = Path.GetTempFileName();

            try
            {
                using (StreamWriter writer = new StreamWriter(outputFilePath, true))
                {
                    foreach (ISequence sequence in sequences)
                    {
                        // write sequence
                        writer.WriteLine(">" + sequence.ID);
                        for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60)
                        {
                            writer.WriteLine(new String(sequence.Skip(lineStart).Take((int)Math.Min(60, sequence.Count - lineStart)).Select(a => (char)a).ToArray()));
                        }
                        writer.Flush();
                    }
                }
                sequences = new FastAParser().Parse(outputFilePath).ToList();
            }
            finally
            {
                File.Delete(outputFilePath);
            }

            Console.WriteLine("Original sequences are:");
            sequences.ForEach(Console.WriteLine);

            Console.WriteLine("Benchmark sequences are:");
            orgSequences.ForEach(Console.WriteLine);

            // Begin alignment
            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = true;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            int gapOpenPenalty     = -13;
            int gapExtendPenalty   = -5;
            int kmerLength         = 3;
            int numberOfDegrees    = 2;
            int numberOfPartitions = 16;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct;

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));

            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesA[i]);
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));

            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesB[i]);
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));

            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesC[i]);
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));

            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequences[i]);
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));
        }
Ejemplo n.º 10
0
        public void TestMuscleMultipleSequenceAlignmentRunningTime()
        {
            string filepath = @"TestUtils\FASTA\RunningTime\BOX246.xml.afa";

            // Test on DNA benchmark dataset
            FastAParser parser = new FastAParser(filepath);

            IList <ISequence> orgSequences = parser.Parse().ToList();

            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            //filepath = @"TestUtils\FASTA\RunningTime\12_raw.afa";
            //List<ISequence> sequences = parser.Parse(filepath);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(new string(sequences[i].Select(a => (char)a).ToArray()));
            }

            Console.WriteLine("Benchmark sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(new string(orgSequences[i].Select(a => (char)a).ToArray()));
            }

            PAMSAMMultipleSequenceAligner.FasterVersion = true;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 2;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;


            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("The number of partitions is: {0}", numberOfPartitions);
            Console.WriteLine("The number of degrees is: {0}", numberOfDegrees);
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));



            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));
            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequencesA[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequencesB[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequencesC[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequences[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));
            ((FastAParser)parser).Dispose();
        }
Ejemplo n.º 11
0
        public void TestMuscleMultipleSequenceAlignmentRunningTime()
        {
            // Test on DNA benchmark dataset
            ISequenceParser parser = new FastaParser();
            //string filepath = @"testdata\FASTA\RunningTime\122.afa";
            string filepath = @"testdata\FASTA\RunningTime\BOX246.xml.afa";

            MoleculeType mt = MoleculeType.Protein;

            IList <ISequence> orgSequences = parser.Parse(filepath);

            List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

            //filepath = @"testdata\FASTA\RunningTime\12_raw.afa";
            //List<ISequence> sequences = parser.Parse(filepath);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }

            Console.WriteLine("Benchmark sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(orgSequences[i].ToString());
            }

            PAMSAMMultipleSequenceAligner.FasterVersion = true;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 2;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;


            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.InnerProductFast;

            SimilarityMatrix similarityMatrix = null;

            switch (mt)
            {
            case (MoleculeType.DNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                break;

            case (MoleculeType.RNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                break;

            case (MoleculeType.Protein):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                break;

            default:
                throw new Exception("Invalid molecular type");
            }

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("The number of partitions is: {0}", numberOfPartitions);
            Console.WriteLine("The number of degrees is: {0}", numberOfDegrees);
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));



            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));
            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesA[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesB[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesC[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequences[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));
        }
Ejemplo n.º 12
0
        public void TestNeedlemanWunschProfileAligner()
        {
            Console.WriteLine("Number of logical processors: {0}", Environment.ProcessorCount);

            ISequence templateSequence = new Sequence(Alphabets.DNA, "ATGCSWRYKMBVHDN-");
            Dictionary <ISequenceItem, int> itemSet = new Dictionary <ISequenceItem, int>();

            for (int i = 0; i < templateSequence.Count; ++i)
            {
                itemSet.Add(templateSequence[i], i);
            }
            Profiles.ItemSet = itemSet;



            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -3;
            int gapExtendPenalty = -1;

            IProfileAligner profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                                     gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);

            ISequence seqA = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence seqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");

            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);

            IProfileAlignment profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[0]);
            IProfileAlignment profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[1]);

            profileAligner.Align(profileAlignmentA, profileAlignmentB);


            List <int> eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
            List <int> eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

            List <ISequence> alignedSequences = new List <ISequence>();

            ISequence seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[0]);

            alignedSequences.Add(seq);
            seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[1]);
            alignedSequences.Add(seq);

            float profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            Console.WriteLine("alignment score is: {0}", profileScore);

            Console.WriteLine("the aligned sequences are:");
            for (int i = 0; i < alignedSequences.Count; ++i)
            {
                Console.WriteLine(alignedSequences[i].ToString());
            }

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGAA---AAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");

            // Test on case 3: 36 sequences
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\RV11_BBS_all.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            int numberOfSequences = orgSequences.Count;

            Console.WriteLine("Original unaligned sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(">");
                Console.WriteLine(sequences[i].ToString());
            }

            for (int i = 1; i < numberOfSequences - 1; ++i)
            {
                for (int j = i + 1; j < numberOfSequences; ++j)
                {
                    profileAlignmentA = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                    profileAlignmentB = ProfileAlignment.GenerateProfileAlignment(sequences[j]);

                    profileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.WeightedInnerProduct,
                                                                             gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount);
                    profileAligner.Align(profileAlignmentA, profileAlignmentB);

                    eStringSubtree  = profileAligner.GenerateEString(profileAligner.AlignedA);
                    eStringSubtreeB = profileAligner.GenerateEString(profileAligner.AlignedB);

                    Console.WriteLine("Sequences lengths are: {0}-{1}", sequences[i].Count, sequences[j].Count);
                    Console.WriteLine("estring 1:");
                    for (int k = 0; k < eStringSubtree.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtree[k]);
                    }
                    Console.WriteLine("\nestring 2:");
                    for (int k = 0; k < eStringSubtreeB.Count; ++k)
                    {
                        Console.Write("{0}\t", eStringSubtreeB[k]);
                    }

                    alignedSequences = new List <ISequence>();

                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtree, sequences[i]);
                    alignedSequences.Add(seq);
                    seq = profileAligner.GenerateSequenceFromEString(eStringSubtreeB, sequences[j]);
                    alignedSequences.Add(seq);

                    profileScore = MsaUtils.MultipleAlignmentScoreFunction(alignedSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

                    Console.WriteLine("\nalignment score is: {0}", profileScore);

                    Console.WriteLine("the aligned sequences are:");
                    for (int k = 0; k < alignedSequences.Count; ++k)
                    {
                        Console.WriteLine(alignedSequences[k].ToString());
                    }
                }
            }
        }
Ejemplo n.º 13
0
        public void TestMsaBenchMarkOnBralibase()
        {
            var allQ  = new List <float>();
            var allTC = new List <float>();

            string        fileDirectory = @"TestUtils\Fasta\RNA\k10".TestDir();
            DirectoryInfo iD            = new DirectoryInfo(fileDirectory);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            var similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);;
            int gapOpenPenalty   = -20;
            int gapExtendPenalty = -5;
            int kmerLength       = 4;

            int numberOfDegrees    = 2;
            int numberOfPartitions = 16;

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached;

            foreach (DirectoryInfo fi in iD.GetDirectories())
            {
                foreach (FileInfo fiii in fi.GetFiles())
                {
                    String filePath = fiii.FullName;
                    Console.WriteLine($"Loading: {filePath}");

                    var orgSequences = new FastAParser()
                    {
                        Alphabet = AmbiguousRnaAlphabet.Instance
                    }.Parse(filePath).ToList();
                    var sequences = MsaUtils.UnAlign(orgSequences);

                    int numberOfSequences = orgSequences.Count;
                    Console.WriteLine("The number of sequences is: {0}", numberOfSequences);

                    PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                            (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                            profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                            numberOfPartitions, numberOfDegrees);

                    Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);

                    float scoreQ  = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences);
                    float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences);
                    Console.WriteLine("Alignment score Q is: {0}", scoreQ);
                    Console.WriteLine("Alignment score TC is: {0}", scoreTC);

                    allQ.Add(scoreQ);
                    allTC.Add(scoreTC);

                    if (allQ.Count % 1000 == 0)
                    {
                        Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
                        Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
                        Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
                    }
                }
            }
            Console.WriteLine("number of datasets is: {0}", allQ.Count);
            Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
            Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
        }
Ejemplo n.º 14
0
        public void TestMsaBenchMarkOnSABmark()
        {
            List <float> allQ  = new List <float>();
            List <float> allTC = new List <float>();

            string        fileDirectory = @"TestUtils\Fasta\Protein\SABmark".TestDir();
            DirectoryInfo iD            = new DirectoryInfo(fileDirectory);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = true;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            SimilarityMatrix similarityMatrix;
            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 3;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct;

            similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            foreach (DirectoryInfo fi in iD.GetDirectories())
            {
                foreach (DirectoryInfo fii in fi.GetDirectories())
                {
                    foreach (FileInfo fiii in fii.GetFiles())
                    {
                        String filePath = fiii.FullName;
                        Console.WriteLine(filePath);
                        FastAParser parser = new FastAParser();

                        IList <ISequence> orgSequences = parser.Parse(filePath).ToList();

                        List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

                        int numberOfSequences = orgSequences.Count;

                        Console.WriteLine("The number of sequences is: {0}", numberOfSequences);
                        Console.WriteLine("Original unaligned sequences are:");

                        PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                                (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                                profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                                numberOfPartitions, numberOfDegrees);

                        Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
                        for (int i = 0; i < msa.AlignedSequences.Count; ++i)
                        {
                            //Console.WriteLine(msa.AlignedSequences[i].ToString());
                        }
                        float scoreQ  = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences);
                        float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences);
                        allQ.Add(scoreQ);
                        allTC.Add(scoreTC);
                        Console.WriteLine("Alignment score Q is: {0}", scoreQ);
                        Console.WriteLine("Alignment score TC is: {0}", scoreTC);

                        if (allQ.Count % 1000 == 0)
                        {
                            Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
                            Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
                            Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
                        }
                    }
                }
            }

            Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
            Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
        }
Ejemplo n.º 15
0
        public void TestMsaBenchMarkLargeDataset()
        {
            // Test on DNA benchmark dataset
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\Protein\Balibase\RV913\BOX032.xml.afa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences);
            int numberOfSequences       = orgSequences.Count;

            String outputFilePath = @"tempBOX032.xml.afa";

            StreamWriter writer = new StreamWriter(outputFilePath, true);

            foreach (ISequence sequence in sequences)
            {
                writer.WriteLine(">" + sequence.ID);
                // write sequence
                BasicDerivedSequence derivedSeq = new BasicDerivedSequence(sequence, false, false, 0, 0);
                for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60)
                {
                    derivedSeq.RangeStart  = lineStart;
                    derivedSeq.RangeLength = Math.Min(60, sequence.Count - lineStart);
                    writer.WriteLine(derivedSeq.ToString());
                }
                writer.Flush();
            }
            writer.Close();

            sequences.Clear();
            sequences = parser.Parse(outputFilePath);

            Console.WriteLine("Original sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(sequences[i].ToString());
            }

            Console.WriteLine("Benchmark sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(orgSequences[i].ToString());
            }

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = true;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;
            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 3;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct;

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, MoleculeType.Protein, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));
            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesA[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesB[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequencesC[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);

            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(msa.AlignedSequences[i].ToString());
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));

            if (File.Exists(outputFilePath))
            {
                File.Delete(outputFilePath);
            }
        }
Ejemplo n.º 16
0
        public void TestMsaBenchMarkOnBralibase()
        {
            List <float> allQ  = new List <float>();
            List <float> allTC = new List <float>();

            string        fileDirectory = @"testData\FASTA\RNA\k10";
            DirectoryInfo iD            = new DirectoryInfo(fileDirectory);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = false;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            MoleculeType     mt = MoleculeType.RNA;
            SimilarityMatrix similarityMatrix;
            int gapOpenPenalty   = -20;
            int gapExtendPenalty = -5;
            int kmerLength       = 4;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached;

            switch (mt)
            {
            case (MoleculeType.DNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                break;

            case (MoleculeType.RNA):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                break;

            case (MoleculeType.Protein):
                similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                break;

            default:
                throw new Exception("Invalid molecular type");
            }


            foreach (DirectoryInfo fi in iD.GetDirectories())
            {
                foreach (FileInfo fiii in fi.GetFiles())
                {
                    String filePath = fiii.FullName;
                    Console.WriteLine(filePath);
                    ISequenceParser parser = new FastaParser();

                    IList <ISequence> orgSequences = parser.Parse(filePath);

                    List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

                    int numberOfSequences = orgSequences.Count;

                    Console.WriteLine("The number of sequences is: {0}", numberOfSequences);
                    Console.WriteLine("Original unaligned sequences are:");

                    PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                            (sequences, mt, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                            profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                            numberOfPartitions, numberOfDegrees);

                    Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
                    for (int i = 0; i < msa.AlignedSequences.Count; ++i)
                    {
                        //Console.WriteLine(msa.AlignedSequences[i].ToString());
                    }
                    float scoreQ  = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences);
                    float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences);
                    allQ.Add(scoreQ);
                    allTC.Add(scoreTC);
                    Console.WriteLine("Alignment score Q is: {0}", scoreQ);
                    Console.WriteLine("Alignment score TC is: {0}", scoreTC);

                    if (allQ.Count % 1000 == 0)
                    {
                        Console.WriteLine(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
                        Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
                        Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
                    }
                }
            }
            Console.WriteLine("number of datasets is: {0}", allQ.Count);
            Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
            Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
        }
Ejemplo n.º 17
0
        public void TestMsaBenchMarkLargeDataset()
        {
            string filepath    = @"\TestUtils\BOX032Small.xml.afa";
            string filePathObj = Directory.GetCurrentDirectory() + filepath;
            // Test on DNA benchmark dataset
            FastAParser       parser       = new FastAParser(filePathObj);
            IList <ISequence> orgSequences = parser.Parse().ToList();

            IList <ISequence> sequences = MsaUtils.UnAlign(orgSequences);
            int numberOfSequences       = orgSequences.Count;

            String outputFilePath = @"tempBOX032.xml.afa";

            using (StreamWriter writer = new StreamWriter(outputFilePath, true))
            {
                foreach (ISequence sequence in sequences)
                {
                    writer.WriteLine(">" + sequence.ID);
                    // write sequence
                    for (int lineStart = 0; lineStart < sequence.Count; lineStart += 60)
                    {
                        writer.WriteLine(new String(sequence.Skip(lineStart).Take((int)Math.Min(60, sequence.Count - lineStart)).Select(a => (char)a).ToArray()));
                    }
                    writer.Flush();
                }
            }

            sequences.Clear();
            parser    = new FastAParser(outputFilePath);
            sequences = parser.Parse().ToList();

            Console.WriteLine("Original sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(new string(sequences[i].Select(a => (char)a).ToArray()));
            }

            Console.WriteLine("Benchmark sequences are:");
            for (int i = 0; i < numberOfSequences; ++i)
            {
                Console.WriteLine(new string(orgSequences[i].Select(a => (char)a).ToArray()));
            }

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = true;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;
            int gapOpenPenalty   = -13;
            int gapExtendPenalty = -5;
            int kmerLength       = 3;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct;

            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                    (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                    profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                    numberOfPartitions, numberOfDegrees);

            Console.WriteLine("Benchmark SPS score is: {0}", MsaUtils.MultipleAlignmentScoreFunction(orgSequences, similarityMatrix, gapOpenPenalty, gapExtendPenalty));
            Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
            for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequencesA[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesA, orgSequences));
            Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
            for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequencesB[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesB, orgSequences));
            Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
            for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequencesC[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequencesC, orgSequences));
            Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);

            for (int i = 0; i < msa.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(new string(msa.AlignedSequences[i].Select(a => (char)a).ToArray()));
            }
            Console.WriteLine("Alignment score Q is: {0}", MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences));
            Console.WriteLine("Alignment score TC is: {0}", MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences));

            ((FastAParser)parser).Dispose();

            if (File.Exists(outputFilePath))
            {
                File.Delete(outputFilePath);
            }
        }
Ejemplo n.º 18
0
        public void TestProgressiveAligner()
        {
            MsaUtils.SetProfileItemSets(MoleculeType.DNA);

            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty   = -8;
            int gapExtendPenalty = -1;
            int kmerLength       = 4;

            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = 2
            };
            ISequence        seqA      = new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT");
            ISequence        seqB      = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG");
            ISequence        seqC      = new Sequence(Alphabets.DNA, "GGGACAAAATCAG");
            List <ISequence> sequences = new List <ISequence>();

            sequences.Add(seqA);
            sequences.Add(seqB);
            sequences.Add(seqC);

            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            IHierarchicalClustering hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            BinaryGuideTree tree = new BinaryGuideTree(hierarchicalClustering);

            IProgressiveAligner progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);

            progressiveAligner.Align(sequences, tree);

            ISequence expectedSeqA = new Sequence(Alphabets.DNA, "GGGA---AAAATCAGATT");
            ISequence expectedSeqB = new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG---");
            ISequence expectedSeqC = new Sequence(Alphabets.DNA, "GGGA--CAAAATCAG---");

            Assert.AreEqual(expectedSeqA.ToString(), progressiveAligner.AlignedSequences[0].ToString());
            Assert.AreEqual(expectedSeqB.ToString(), progressiveAligner.AlignedSequences[1].ToString());
            Assert.AreEqual(expectedSeqC.ToString(), progressiveAligner.AlignedSequences[2].ToString());



            sequences = new List <ISequence>();
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAATCG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCTTATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAAAAATCAGATT"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGAATCAAAATCAG"));
            sequences.Add(new Sequence(Alphabets.DNA, "GGGACAAAATCAG"));

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = 0; i < tree.NumberOfNodes; ++i)
            {
                Console.WriteLine("Node {0} ID: {1}", i, tree.Nodes[i].ID);
            }
            for (int i = 0; i < tree.NumberOfEdges; ++i)
            {
                Console.WriteLine("Edge {0} ID: {1}, length: {2}", i, tree.Edges[i].ID, tree.Edges[i].Length);
            }

            SequenceWeighting sw = new SequenceWeighting(tree);

            for (int i = 0; i < sw.Weights.Length; ++i)
            {
                Console.WriteLine("weights {0} is {1}", i, sw.Weights[i]);
            }

            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }


            MsaUtils.SetProfileItemSets(MoleculeType.Protein);
            ISequenceParser   parser       = new FastaParser();
            string            filepath     = @"testdata\FASTA\Protein\BB11001.tfa";
            IList <ISequence> orgSequences = parser.Parse(filepath);

            sequences = MsaUtils.UnAlign(orgSequences);

            similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
            kmerLength       = 4;
            int numberOfSequences = orgSequences.Count;

            gapOpenPenalty   = -13;
            gapExtendPenalty = -5;

            kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, kmerLength, MoleculeType.DNA);

            kmerDistanceMatrixGenerator.GenerateDistanceMatrix(sequences);

            hierarchicalClustering = new HierarchicalClusteringParallel(kmerDistanceMatrixGenerator.DistanceMatrix);

            tree = new BinaryGuideTree(hierarchicalClustering);

            for (int i = tree.NumberOfLeaves; i < tree.Nodes.Count; ++i)
            {
                Console.WriteLine("Node {0}: leftchildren-{1}, rightChildren-{2}", i, tree.Nodes[i].LeftChildren.ID, tree.Nodes[i].RightChildren.ID);
            }
            progressiveAligner = new ProgressiveAligner(ProfileAlignerNames.NeedlemanWunschProfileAligner, similarityMatrix, gapOpenPenalty, gapExtendPenalty);
            progressiveAligner.Align(sequences, tree);
            for (int i = 0; i < progressiveAligner.AlignedSequences.Count; ++i)
            {
                Console.WriteLine(progressiveAligner.AlignedSequences[i].ToString());
            }
        }
Ejemplo n.º 19
0
        public void TestMsaBenchMark()
        {
            string        fileDirectory = @"TestUtils\FASTA\Protein\Balibase\RV911\";
            DirectoryInfo iD            = new DirectoryInfo(fileDirectory);

            PAMSAMMultipleSequenceAligner.FasterVersion = false;
            PAMSAMMultipleSequenceAligner.UseWeights    = false;
            PAMSAMMultipleSequenceAligner.UseStageB     = true;
            PAMSAMMultipleSequenceAligner.NumberOfCores = 2;

            SimilarityMatrix similarityMatrix;
            int gapOpenPenalty   = -20;
            int gapExtendPenalty = -5;
            int kmerLength       = 4;

            int numberOfDegrees    = 2;  //Environment.ProcessorCount;
            int numberOfPartitions = 16; // Environment.ProcessorCount * 2;

            DistanceFunctionTypes      distanceFunctionName             = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames        profileAlignerName         = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames  profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProductCached;

            similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62);

            List <float> allQ  = new List <float>();
            List <float> allTC = new List <float>();

            foreach (FileInfo fi in iD.GetFiles())
            {
                String filePath = fi.FullName;
                Console.WriteLine(filePath);
                FastAParser parser = new FastAParser(filePath);

                parser.Alphabet = AmbiguousProteinAlphabet.Instance;
                IList <ISequence> orgSequences = parser.Parse().ToList();

                List <ISequence> sequences = MsaUtils.UnAlign(orgSequences);

                int numberOfSequences = orgSequences.Count;

                Console.WriteLine("The number of sequences is: {0}", numberOfSequences);
                Console.WriteLine("Original unaligned sequences are:");
                for (int i = 0; i < numberOfSequences; ++i)
                {
                    //Console.WriteLine(sequences[i].ToString());
                }
                Console.WriteLine("Original aligned sequences are:");
                for (int i = 0; i < numberOfSequences; ++i)
                {
                    //Console.WriteLine(orgSequences[i].ToString());
                }

                PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                                                        (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                        profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                                                        numberOfPartitions, numberOfDegrees);

                Console.WriteLine("Aligned sequences in stage 1: {0}", msa.AlignmentScoreA);
                for (int i = 0; i < msa.AlignedSequencesA.Count; ++i)
                {
                    //Console.WriteLine(msa.AlignedSequencesA[i].ToString());
                }
                Console.WriteLine("Aligned sequences in stage 2: {0}", msa.AlignmentScoreB);
                for (int i = 0; i < msa.AlignedSequencesB.Count; ++i)
                {
                    //Console.WriteLine(msa.AlignedSequencesB[i].ToString());
                }
                Console.WriteLine("Aligned sequences in stage 3: {0}", msa.AlignmentScoreC);
                for (int i = 0; i < msa.AlignedSequencesC.Count; ++i)
                {
                    //Console.WriteLine(msa.AlignedSequencesC[i].ToString());
                }

                Console.WriteLine("Aligned sequences final: {0}", msa.AlignmentScore);
                for (int i = 0; i < msa.AlignedSequences.Count; ++i)
                {
                    //Console.WriteLine(msa.AlignedSequences[i].ToString());
                }
                float scoreQ  = MsaUtils.CalculateAlignmentScoreQ(msa.AlignedSequences, orgSequences);
                float scoreTC = MsaUtils.CalculateAlignmentScoreTC(msa.AlignedSequences, orgSequences);
                allQ.Add(scoreQ);
                allTC.Add(scoreTC);
                Console.WriteLine("Alignment score Q is: {0}", scoreQ);
                Console.WriteLine("Alignment score TC is: {0}", scoreTC);
                ((FastAParser)parser).Dispose();
            }
            Console.WriteLine("Number of datasets is: {0}", allQ.Count);
            Console.WriteLine("average Q score is: {0}", MsaUtils.Mean(allQ.ToArray()));
            Console.WriteLine("average TC score is: {0}", MsaUtils.Mean(allTC.ToArray()));
        }