Ejemplo n.º 1
0
        /// <summary>
        ///     Validate Muscle multiple sequence alignment with static properties
        ///     of PamsamMultipleSequenceAligner.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="hierarchicalClusteringMethodName"></param>
        /// <param name="distanceFunctionName">kmerdistancematrix method name.</param>
        /// <param name="profileAlignerName"></param>
        /// <param name="profileScoreName">Profile score function name.</param>
        /// <param name="useweights">use sequence weights true\false</param>
        /// <param name="fasterVersion">fasterversion true\false</param>
        /// <param name="useStageB">stage2 computation true\false</param>
        /// <param name="expectedScoreNode"></param>
        private void ValidatePamsamAlign(string nodeName,
                                         string expectedScoreNode,
                                         UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
                                         DistanceFunctionTypes distanceFunctionName,
                                         ProfileAlignerNames profileAlignerName,
                                         ProfileScoreFunctionNames profileScoreName,
                                         bool useweights,
                                         bool fasterVersion,
                                         bool useStageB)
        {
            Initialize(nodeName, expectedScoreNode);

            // get old properties
            bool prevVersion = PAMSAMMultipleSequenceAligner.FasterVersion;
            bool prevUseWeights = PAMSAMMultipleSequenceAligner.UseWeights;
            bool prevUseStageB = PAMSAMMultipleSequenceAligner.UseStageB;

            try
            {
                // Set static properties
                PAMSAMMultipleSequenceAligner.FasterVersion = fasterVersion;
                PAMSAMMultipleSequenceAligner.UseWeights = useweights;
                PAMSAMMultipleSequenceAligner.UseStageB = useStageB;

                // MSA aligned sequences.
                int numberOfDegrees = 2;
                int numberOfPartitions = 2;
                var msa =
                    new PAMSAMMultipleSequenceAligner(lstSequences,
                                                      kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                      profileAlignerName, profileScoreName, similarityMatrix,
                                                      gapOpenPenalty,
                                                      gapExtendPenalty, numberOfDegrees, numberOfPartitions);

                // Validate the aligned Sequence and score
                if (fasterVersion)
                {
                    InitializeStage1Variables(nodeName);
                    Assert.AreEqual(stage1ExpectedSequences.Count, msa.AlignedSequences.Count);
                    int index = 0;
                    foreach (ISequence seq in msa.AlignedSequences)
                    {
                        Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()),
                                        new string(stage1ExpectedSequences[index].Select(a => (char) a).ToArray()));
                        index++;
                    }
                    Assert.IsTrue(stage1ExpectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null)));
                }
                else
                {
                    int index = 0;
                    foreach (ISequence seq in msa.AlignedSequences)
                    {
                        Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()),
                                        new string(expectedSequences[index].Select(a => (char) a).ToArray()));
                        index++;
                    }
                    Assert.AreEqual(expectedScore, msa.AlignmentScore.ToString((IFormatProvider) null));
                }
            }
            finally
            {
                // Reset it back
                PAMSAMMultipleSequenceAligner.FasterVersion = prevVersion;
                PAMSAMMultipleSequenceAligner.UseWeights = prevUseWeights;
                PAMSAMMultipleSequenceAligner.UseStageB = prevUseStageB;
            }

            ApplicationLog.WriteLine(
                String.Format(null, @"Validation of pamsam alignment completed 
                      successfully for molecule type {0} with 
                      static property fasterversion {0}, usestageb {1} and useweights {2}",
                              fasterVersion, useStageB, useweights));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Aligns multiple sequences using a multiple sequence aligner.
        /// This sample uses PAMSAM with a set of default parameters.
        /// </summary>
        /// <param name="sequences">List of sequences to align.</param>
        /// <returns>List of ISequenceAlignment</returns>
        public static IList<ISequence> DoMultipleSequenceAlignment(List<ISequence> sequences)
        {
            // Initialize objects for constructor
            SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            int gapOpenPenalty = -4;
            int gapExtendPenalty = -1;
            int kmerLength = 3;

            DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance;
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average;
            ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner;
            ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct;

            // Call aligner
            PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner
                (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty,
                Environment.ProcessorCount * 2, Environment.ProcessorCount);

            return msa.AlignedSequences;
        }
Ejemplo n.º 3
0
 /// <summary>
 ///     Get the aligned sequence for stage1
 /// </summary>
 /// <returns>Sequence list</returns>
 private List<ISequence> GetStage1AlignedSequence()
 {
     // MSA aligned sequences.
     var msa =
         new PAMSAMMultipleSequenceAligner(lstSequences,
                                           kmerLength, DistanceFunctionTypes.EuclideanDistance,
                                           UpdateDistanceMethodsTypes.Average,
                                           ProfileAlignerNames.NeedlemanWunschProfileAligner,
                                           ProfileScoreFunctionNames.InnerProduct, similarityMatrix,
                                           gapOpenPenalty,
                                           gapExtendPenalty, 2, 2);
     return msa.AlignedSequencesA;
 }
Ejemplo n.º 4
0
        /// <summary>
        ///     Get Pamsam aligned sequences
        /// </summary>
        /// <param name="sequences">sequences.</param>
        /// <returns>returns aligned sequences</returns>
        private IList<ISequence> GetPAMSAMAlignedSequences(IList<ISequence> sequences)
        {
            similarityMatrix = new SimilarityMatrix(
                SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);

            // MSA aligned sequences.
            var msa = new PAMSAMMultipleSequenceAligner(sequences,
                                                        kmerLength, DistanceFunctionTypes.EuclideanDistance,
                                                        UpdateDistanceMethodsTypes.Average,
                                                        ProfileAlignerNames.NeedlemanWunschProfileAligner,
                                                        ProfileScoreFunctionNames.InnerProductFast, similarityMatrix,
                                                        gapOpenPenalty, gapExtendPenalty, 2, 2);

            return msa.AlignedSequences;
        }
Ejemplo n.º 5
0
        /// <summary>
        ///     Validate Stage 2 aligned sequences and score of Muscle multiple sequence alignment.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="expectedScoreNode">Expected score node</param>
        /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param>
        /// <param name="distanceFunctionName">kmerdistancematrix method name.</param>
        /// <param name="profileAlignerName">SW/NW profiler</param>
        /// <param name="profileScoreName">Profile score function name.</param>
        private void ValidatePamsamAlignStage2(string nodeName,
                                               string expectedScoreNode,
                                               UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
                                               DistanceFunctionTypes distanceFunctionName,
                                               ProfileAlignerNames profileAlignerName,
                                               ProfileScoreFunctionNames profileScoreName)
        {
            Initialize(nodeName, expectedScoreNode);
            InitializeStage2Variables(nodeName);

            // MSA aligned sequences.
            var msa =
                new PAMSAMMultipleSequenceAligner(lstSequences,
                                                  kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                  profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty,
                                                  gapExtendPenalty, 2, 2);

            // Validate the aligned Sequence and score of stage2
            if (null != msa.AlignedSequencesB)
            {
                Assert.AreEqual(stage2ExpectedSequences.Count, msa.AlignedSequencesB.Count);
                int index = 0;
                foreach (ISequence seq in msa.AlignedSequencesB)
                {
                    Assert.AreEqual(new string(stage2ExpectedSequences[index].Select(a => (char) a).ToArray()),
                                    new string(seq.Select(a => (char) a).ToArray()));
                    index++;
                }
                Assert.AreEqual(stage2ExpectedScore, msa.AlignmentScoreB.ToString((IFormatProvider) null));
            }

            ApplicationLog.WriteLine(String.Format(null,
                                                   "PamsamBvtTest:: Pamsam stage2 alignment completed successfully with all default params"));
        }
Ejemplo n.º 6
0
        /// <summary>
        ///     Validate Stage 3 aligned sequences and score of Muscle multiple sequence alignment.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="expectedScoreNode">Expected score node</param>
        /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param>
        /// <param name="distanceFunctionName">kmerdistancematrix method name.</param>
        /// <param name="profileAlignerName">SW/NW profiler</param>
        /// <param name="profileScoreName">Profile score function name.</param>
        private void ValidatePamsamAlignStage3(string nodeName,
                                               string expectedScoreNode,
                                               UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
                                               DistanceFunctionTypes distanceFunctionName,
                                               ProfileAlignerNames profileAlignerName,
                                               ProfileScoreFunctionNames profileScoreName)
        {
            Initialize(nodeName, expectedScoreNode);

            // MSA aligned sequences.
            var msa =
                new PAMSAMMultipleSequenceAligner(lstSequences,
                                                  kmerLength, distanceFunctionName, hierarchicalClusteringMethodName,
                                                  profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty,
                                                  gapExtendPenalty, 2, 2);

            string expectedSeqString = expectedSequences.Aggregate(string.Empty,
                                                                   (current, seq) =>
                                                                   current +
                                                                   (new string(seq.Select(a => (char) a).ToArray()) +
                                                                    ","));

            foreach (ISequence seq in msa.AlignedSequencesC)
            {
                Assert.IsTrue(expectedSeqString.Contains(new string(seq.Select(a => (char) a).ToArray())));
            }

            Assert.IsTrue(expectedScore.Contains(msa.AlignmentScoreC.ToString((IFormatProvider) null)));
            ApplicationLog.WriteLine(String.Format(null,
                                                   "PamsamBvtTest:: Pamsam stage3 alignment completed successfully with all default params"));
        }
Ejemplo n.º 7
0
        /// <summary>
        ///     Validate Muscle multiple sequence alignment.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="expectedScoreNode">Expected score node</param>
        /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param>
        /// <param name="distanceFunctionName">kmerdistancematrix method name.</param>
        /// <param name="profileAlignerName">SW/NW profiler</param>
        /// <param name="profileScoreName">Profile score function name.</param>
        /// <param name="isWeightedProduct">True if it of the WeightedProduct type else false.</param>
        private void ValidatePamsamAlign(string nodeName,
                                         string expectedScoreNode,
                                         UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
                                         DistanceFunctionTypes distanceFunctionName,
                                         ProfileAlignerNames profileAlignerName,
                                         ProfileScoreFunctionNames profileScoreName,
                                         bool isWeightedProduct)
        {
            Initialize(nodeName, expectedScoreNode);

            // MSA aligned sequences.
            var msa = new PAMSAMMultipleSequenceAligner(lstSequences,
                                                        kmerLength, distanceFunctionName,
                                                        hierarchicalClusteringMethodName,
                                                        profileAlignerName, profileScoreName,
                                                        similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2);

            int index = 0;
            foreach (ISequence seq in msa.AlignedSequences)
            {
                if (isWeightedProduct)
                {
                    Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()),
                                    new string(expectedSequences[index].Select(a => (char) a).ToArray()));
                    index++;
                }
            }

            Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null)));
        }
Ejemplo n.º 8
0
        /// <summary>
        ///     Read from xml config and initialize all member variables
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="expectedScoreNode">Expected score node</param>
        private void Initialize(string nodeName, string expectedScoreNode)
        {
            // Read all the input sequences from xml config file
            IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName,
                                                                                     Constants.AlphabetNameNode));
            string sequenceString1 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence1);
            string sequenceString2 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence2);
            string sequenceString3 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence3);
            string sequenceString4 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence4);
            string sequenceString5 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence5);
            string sequenceString6 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence6);
            string sequenceString7 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence7);
            string sequenceString8 = null;
            string sequenceString9 = null;

            // Get all the input sequence object
            lstSequences = new List<ISequence>();
            ISequence seq1 = new Sequence(alphabet, sequenceString1);
            ISequence seq2 = new Sequence(alphabet, sequenceString2);
            ISequence seq3 = new Sequence(alphabet, sequenceString3);
            ISequence seq4 = new Sequence(alphabet, sequenceString4);
            ISequence seq5 = new Sequence(alphabet, sequenceString5);
            ISequence seq6 = new Sequence(alphabet, sequenceString6);
            ISequence seq7 = new Sequence(alphabet, sequenceString7);
            ISequence seq8 = null;
            ISequence seq9 = null;

            // Add all sequences to list.
            lstSequences.Add(seq1);
            lstSequences.Add(seq2);
            lstSequences.Add(seq3);
            lstSequences.Add(seq4);
            lstSequences.Add(seq5);
            lstSequences.Add(seq6);
            lstSequences.Add(seq7);

            similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            profileAligner = new NeedlemanWunschProfileAlignerParallel(similarityMatrix,
                                                                       ProfileScoreFunctionNames.InnerProduct,
                                                                       gapOpenPenalty, gapExtendPenalty,
                                                                       Environment.ProcessorCount);

            // Read all expected Sequences
            sequenceString1 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1);
            sequenceString2 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2);
            sequenceString3 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode3);
            sequenceString4 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode4);
            sequenceString5 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode5);
            sequenceString6 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode6);
            sequenceString7 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode7);
            sequenceString8 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode8);
            sequenceString9 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode9);

            seq1 = new Sequence(alphabet, sequenceString1);
            seq2 = new Sequence(alphabet, sequenceString2);
            seq3 = new Sequence(alphabet, sequenceString3);
            seq4 = new Sequence(alphabet, sequenceString4);
            seq5 = new Sequence(alphabet, sequenceString5);
            seq6 = new Sequence(alphabet, sequenceString6);
            seq7 = new Sequence(alphabet, sequenceString7);
            seq8 = new Sequence(alphabet, sequenceString8);
            seq9 = new Sequence(alphabet, sequenceString9);

            // Add all sequences to list.
            expectedSequences = new List<ISequence>();
            expectedSequences.Add(seq1);
            expectedSequences.Add(seq2);
            expectedSequences.Add(seq3);
            expectedSequences.Add(seq4);
            expectedSequences.Add(seq5);
            expectedSequences.Add(seq6);
            expectedSequences.Add(seq7);
            expectedSequences.Add(seq8);
            expectedSequences.Add(seq9);

            expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, expectedScoreNode);

            // Parallel Option will only get set if the PAMSAMMultipleSequenceAligner is getting called
            // To test separately distance matrix, binary tree etc.. 
            // Set the parallel option using below ctor.
            var msa = new PAMSAMMultipleSequenceAligner(lstSequences,
                                                        kmerLength, DistanceFunctionTypes.EuclideanDistance,
                                                        UpdateDistanceMethodsTypes.Average,
                                                        ProfileAlignerNames.NeedlemanWunschProfileAligner,
                                                        ProfileScoreFunctionNames.InnerProduct, similarityMatrix,
                                                        gapOpenPenalty, gapExtendPenalty, 2, 2);

            ApplicationLog.WriteLine(String.Format(null,
                                                   "Initialization of all variables successfully completed for xml node {0}",
                                                   nodeName));
        }
Ejemplo n.º 9
0
        /// <summary>
        ///     Get Pamsam aligned sequences
        /// </summary>
        /// <param name="moleculeType">Molecule Type.</param>
        /// <param name="sequences">sequences.</param>
        /// <returns>returns aligned sequences</returns>
        private IList<ISequence> GetPAMSAMAlignedSequences(MoleculeType moleculeType,
                                                           IList<ISequence> sequences)
        {
            switch (moleculeType)
            {
                case MoleculeType.DNA:
                    similarityMatrix = new SimilarityMatrix(
                        SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
                    break;
                case MoleculeType.RNA:
                    similarityMatrix = new SimilarityMatrix(
                        SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
                    break;
                case MoleculeType.Protein:
                    similarityMatrix = new SimilarityMatrix(
                        SimilarityMatrix.StandardSimilarityMatrix.Blosum62);
                    break;
            }
            // MSA aligned sequences.
            var msa = new PAMSAMMultipleSequenceAligner(sequences,
                                                        kmerLength, DistanceFunctionTypes.EuclideanDistance,
                                                        UpdateDistanceMethodsTypes.Average,
                                                        ProfileAlignerNames.NeedlemanWunschProfileAligner,
                                                        ProfileScoreFunctionNames.InnerProductFast, similarityMatrix,
                                                        gapOpenPenalty, gapExtendPenalty, 2, 2);

            return msa.AlignedSequences;
        }
Ejemplo n.º 10
0
        /// <summary>
        ///     Validate Muscle multiple sequence alignment with gap open cost and penalty.
        /// </summary>
        /// <param name="nodeName">xml node name</param>
        /// <param name="moleculeType">molecule type</param>
        /// <param name="expectedScoreNode">Expected score node</param>
        /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param>
        /// <param name="distanceFunctionName">kmerdistancematrix method name.</param>
        /// <param name="profileAlignerName">SW/NW profiler</param>
        /// <param name="profileScoreName">Profile score function name.</param>
        /// <param name="gpOpenPenalty">Gap open penalty</param>
        /// <param name="gpExtendPenalty">Gap extended penalty</param>
        /// <param name="IsAlignedLargeSeq">True for large sequence else false</param>
        private void ValidatePamsamAlignWithGapCost(
            string nodeName, MoleculeType moleculeType, string expectedScoreNode,
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
            DistanceFunctionTypes distanceFunctionName,
            ProfileAlignerNames profileAlignerName,
            ProfileScoreFunctionNames profileScoreName,
            int gpOpenPenalty, int gpExtendPenalty, bool IsAlignedLargeSeq)
        {
            Initialize(nodeName, expectedScoreNode);

            // MSA aligned sequences with sepcified gap costs.
            var msa = new PAMSAMMultipleSequenceAligner(lstSequences,
                                                        kmerLength, distanceFunctionName,
                                                        hierarchicalClusteringMethodName,
                                                        profileAlignerName, profileScoreName, similarityMatrix,
                                                        gpOpenPenalty,
                                                        gpExtendPenalty, 2, 2);

            // Validate the aligned Sequence and score
            int index = 0;
            foreach (ISequence seq in msa.AlignedSequences)
            {
                if (IsAlignedLargeSeq)
                {
                    Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()),
                                    new string(expectedSequences[index].Select(a => (char) a).ToArray()));
                    index++;
                }
            }

            Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null)));
            ApplicationLog.WriteLine(String.Format(null,
                                                   "PamsamP1Test:: Pamsam alignment completed successfully with equal gap cost for {0} moleculetype with all default params",
                                                   moleculeType.ToString()));
        }
Ejemplo n.º 11
0
        private void ValidatePamsamAlign(
            string nodeName, MoleculeType moleculeType, string expectedScoreNode,
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
            DistanceFunctionTypes distanceFunctionName,
            ProfileAlignerNames profileAlignerName,
            ProfileScoreFunctionNames profileScoreName, int kmrlength,
            bool addOnelineSequences, bool IsAlignForMoreSeq)
        {
            Initialize(nodeName, expectedScoreNode);
            if (addOnelineSequences)
            {
                AddOneLineSequences(nodeName);
            }

            // MSA aligned sequences.
            var msa = new PAMSAMMultipleSequenceAligner(lstSequences,
                                                        kmrlength, distanceFunctionName,
                                                        hierarchicalClusteringMethodName,
                                                        profileAlignerName, profileScoreName, similarityMatrix,
                                                        gapOpenPenalty,
                                                        gapExtendPenalty, 2, 2);

            // Validate the aligned Sequence and score
            int index = 0;
            foreach (ISequence seq in msa.AlignedSequences)
            {
                if (IsAlignForMoreSeq)
                {
                    Assert.IsTrue(expectedSequences.Contains(seq));
                    index++;
                }
            }

            Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null)));
        }