/// <summary> /// Validate Muscle multiple sequence alignment with static properties /// of PamsamMultipleSequenceAligner. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="hierarchicalClusteringMethodName"></param> /// <param name="distanceFunctionName">kmerdistancematrix method name.</param> /// <param name="profileAlignerName"></param> /// <param name="profileScoreName">Profile score function name.</param> /// <param name="useweights">use sequence weights true\false</param> /// <param name="fasterVersion">fasterversion true\false</param> /// <param name="useStageB">stage2 computation true\false</param> /// <param name="expectedScoreNode"></param> private void ValidatePamsamAlign(string nodeName, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName, bool useweights, bool fasterVersion, bool useStageB) { Initialize(nodeName, expectedScoreNode); // get old properties bool prevVersion = PAMSAMMultipleSequenceAligner.FasterVersion; bool prevUseWeights = PAMSAMMultipleSequenceAligner.UseWeights; bool prevUseStageB = PAMSAMMultipleSequenceAligner.UseStageB; try { // Set static properties PAMSAMMultipleSequenceAligner.FasterVersion = fasterVersion; PAMSAMMultipleSequenceAligner.UseWeights = useweights; PAMSAMMultipleSequenceAligner.UseStageB = useStageB; // MSA aligned sequences. int numberOfDegrees = 2; int numberOfPartitions = 2; var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, numberOfDegrees, numberOfPartitions); // Validate the aligned Sequence and score if (fasterVersion) { InitializeStage1Variables(nodeName); Assert.AreEqual(stage1ExpectedSequences.Count, msa.AlignedSequences.Count); int index = 0; foreach (ISequence seq in msa.AlignedSequences) { Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()), new string(stage1ExpectedSequences[index].Select(a => (char) a).ToArray())); index++; } Assert.IsTrue(stage1ExpectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null))); } else { int index = 0; foreach (ISequence seq in msa.AlignedSequences) { Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()), new string(expectedSequences[index].Select(a => (char) a).ToArray())); index++; } Assert.AreEqual(expectedScore, msa.AlignmentScore.ToString((IFormatProvider) null)); } } finally { // Reset it back PAMSAMMultipleSequenceAligner.FasterVersion = prevVersion; PAMSAMMultipleSequenceAligner.UseWeights = prevUseWeights; PAMSAMMultipleSequenceAligner.UseStageB = prevUseStageB; } ApplicationLog.WriteLine( String.Format(null, @"Validation of pamsam alignment completed successfully for molecule type {0} with static property fasterversion {0}, usestageb {1} and useweights {2}", fasterVersion, useStageB, useweights)); }
/// <summary> /// Aligns multiple sequences using a multiple sequence aligner. /// This sample uses PAMSAM with a set of default parameters. /// </summary> /// <param name="sequences">List of sequences to align.</param> /// <returns>List of ISequenceAlignment</returns> public static IList<ISequence> DoMultipleSequenceAlignment(List<ISequence> sequences) { // Initialize objects for constructor SimilarityMatrix similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); int gapOpenPenalty = -4; int gapExtendPenalty = -1; int kmerLength = 3; DistanceFunctionTypes distanceFunctionName = DistanceFunctionTypes.EuclideanDistance; UpdateDistanceMethodsTypes hierarchicalClusteringMethodName = UpdateDistanceMethodsTypes.Average; ProfileAlignerNames profileAlignerName = ProfileAlignerNames.NeedlemanWunschProfileAligner; ProfileScoreFunctionNames profileProfileFunctionName = ProfileScoreFunctionNames.WeightedInnerProduct; // Call aligner PAMSAMMultipleSequenceAligner msa = new PAMSAMMultipleSequenceAligner (sequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileProfileFunctionName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount * 2, Environment.ProcessorCount); return msa.AlignedSequences; }
/// <summary> /// Get the aligned sequence for stage1 /// </summary> /// <returns>Sequence list</returns> private List<ISequence> GetStage1AlignedSequence() { // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.InnerProduct, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); return msa.AlignedSequencesA; }
/// <summary> /// Get Pamsam aligned sequences /// </summary> /// <param name="sequences">sequences.</param> /// <returns>returns aligned sequences</returns> private IList<ISequence> GetPAMSAMAlignedSequences(IList<ISequence> sequences) { similarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(sequences, kmerLength, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.InnerProductFast, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); return msa.AlignedSequences; }
/// <summary> /// Validate Stage 2 aligned sequences and score of Muscle multiple sequence alignment. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="expectedScoreNode">Expected score node</param> /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param> /// <param name="distanceFunctionName">kmerdistancematrix method name.</param> /// <param name="profileAlignerName">SW/NW profiler</param> /// <param name="profileScoreName">Profile score function name.</param> private void ValidatePamsamAlignStage2(string nodeName, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName) { Initialize(nodeName, expectedScoreNode); InitializeStage2Variables(nodeName); // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); // Validate the aligned Sequence and score of stage2 if (null != msa.AlignedSequencesB) { Assert.AreEqual(stage2ExpectedSequences.Count, msa.AlignedSequencesB.Count); int index = 0; foreach (ISequence seq in msa.AlignedSequencesB) { Assert.AreEqual(new string(stage2ExpectedSequences[index].Select(a => (char) a).ToArray()), new string(seq.Select(a => (char) a).ToArray())); index++; } Assert.AreEqual(stage2ExpectedScore, msa.AlignmentScoreB.ToString((IFormatProvider) null)); } ApplicationLog.WriteLine(String.Format(null, "PamsamBvtTest:: Pamsam stage2 alignment completed successfully with all default params")); }
/// <summary> /// Validate Stage 3 aligned sequences and score of Muscle multiple sequence alignment. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="expectedScoreNode">Expected score node</param> /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param> /// <param name="distanceFunctionName">kmerdistancematrix method name.</param> /// <param name="profileAlignerName">SW/NW profiler</param> /// <param name="profileScoreName">Profile score function name.</param> private void ValidatePamsamAlignStage3(string nodeName, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName) { Initialize(nodeName, expectedScoreNode); // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); string expectedSeqString = expectedSequences.Aggregate(string.Empty, (current, seq) => current + (new string(seq.Select(a => (char) a).ToArray()) + ",")); foreach (ISequence seq in msa.AlignedSequencesC) { Assert.IsTrue(expectedSeqString.Contains(new string(seq.Select(a => (char) a).ToArray()))); } Assert.IsTrue(expectedScore.Contains(msa.AlignmentScoreC.ToString((IFormatProvider) null))); ApplicationLog.WriteLine(String.Format(null, "PamsamBvtTest:: Pamsam stage3 alignment completed successfully with all default params")); }
/// <summary> /// Validate Muscle multiple sequence alignment. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="expectedScoreNode">Expected score node</param> /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param> /// <param name="distanceFunctionName">kmerdistancematrix method name.</param> /// <param name="profileAlignerName">SW/NW profiler</param> /// <param name="profileScoreName">Profile score function name.</param> /// <param name="isWeightedProduct">True if it of the WeightedProduct type else false.</param> private void ValidatePamsamAlign(string nodeName, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName, bool isWeightedProduct) { Initialize(nodeName, expectedScoreNode); // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); int index = 0; foreach (ISequence seq in msa.AlignedSequences) { if (isWeightedProduct) { Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()), new string(expectedSequences[index].Select(a => (char) a).ToArray())); index++; } } Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null))); }
/// <summary> /// Read from xml config and initialize all member variables /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="expectedScoreNode">Expected score node</param> private void Initialize(string nodeName, string expectedScoreNode) { // Read all the input sequences from xml config file IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string sequenceString1 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence1); string sequenceString2 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence2); string sequenceString3 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence3); string sequenceString4 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence4); string sequenceString5 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence5); string sequenceString6 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence6); string sequenceString7 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Sequence7); string sequenceString8 = null; string sequenceString9 = null; // Get all the input sequence object lstSequences = new List<ISequence>(); ISequence seq1 = new Sequence(alphabet, sequenceString1); ISequence seq2 = new Sequence(alphabet, sequenceString2); ISequence seq3 = new Sequence(alphabet, sequenceString3); ISequence seq4 = new Sequence(alphabet, sequenceString4); ISequence seq5 = new Sequence(alphabet, sequenceString5); ISequence seq6 = new Sequence(alphabet, sequenceString6); ISequence seq7 = new Sequence(alphabet, sequenceString7); ISequence seq8 = null; ISequence seq9 = null; // Add all sequences to list. lstSequences.Add(seq1); lstSequences.Add(seq2); lstSequences.Add(seq3); lstSequences.Add(seq4); lstSequences.Add(seq5); lstSequences.Add(seq6); lstSequences.Add(seq7); similarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); profileAligner = new NeedlemanWunschProfileAlignerParallel(similarityMatrix, ProfileScoreFunctionNames.InnerProduct, gapOpenPenalty, gapExtendPenalty, Environment.ProcessorCount); // Read all expected Sequences sequenceString1 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); sequenceString2 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); sequenceString3 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode3); sequenceString4 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode4); sequenceString5 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode5); sequenceString6 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode6); sequenceString7 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode7); sequenceString8 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode8); sequenceString9 = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode9); seq1 = new Sequence(alphabet, sequenceString1); seq2 = new Sequence(alphabet, sequenceString2); seq3 = new Sequence(alphabet, sequenceString3); seq4 = new Sequence(alphabet, sequenceString4); seq5 = new Sequence(alphabet, sequenceString5); seq6 = new Sequence(alphabet, sequenceString6); seq7 = new Sequence(alphabet, sequenceString7); seq8 = new Sequence(alphabet, sequenceString8); seq9 = new Sequence(alphabet, sequenceString9); // Add all sequences to list. expectedSequences = new List<ISequence>(); expectedSequences.Add(seq1); expectedSequences.Add(seq2); expectedSequences.Add(seq3); expectedSequences.Add(seq4); expectedSequences.Add(seq5); expectedSequences.Add(seq6); expectedSequences.Add(seq7); expectedSequences.Add(seq8); expectedSequences.Add(seq9); expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, expectedScoreNode); // Parallel Option will only get set if the PAMSAMMultipleSequenceAligner is getting called // To test separately distance matrix, binary tree etc.. // Set the parallel option using below ctor. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.InnerProduct, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); ApplicationLog.WriteLine(String.Format(null, "Initialization of all variables successfully completed for xml node {0}", nodeName)); }
/// <summary> /// Get Pamsam aligned sequences /// </summary> /// <param name="moleculeType">Molecule Type.</param> /// <param name="sequences">sequences.</param> /// <returns>returns aligned sequences</returns> private IList<ISequence> GetPAMSAMAlignedSequences(MoleculeType moleculeType, IList<ISequence> sequences) { switch (moleculeType) { case MoleculeType.DNA: similarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); break; case MoleculeType.RNA: similarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); break; case MoleculeType.Protein: similarityMatrix = new SimilarityMatrix( SimilarityMatrix.StandardSimilarityMatrix.Blosum62); break; } // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(sequences, kmerLength, DistanceFunctionTypes.EuclideanDistance, UpdateDistanceMethodsTypes.Average, ProfileAlignerNames.NeedlemanWunschProfileAligner, ProfileScoreFunctionNames.InnerProductFast, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); return msa.AlignedSequences; }
/// <summary> /// Validate Muscle multiple sequence alignment with gap open cost and penalty. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">molecule type</param> /// <param name="expectedScoreNode">Expected score node</param> /// <param name="hierarchicalClusteringMethodName">hierarchical clustering method name</param> /// <param name="distanceFunctionName">kmerdistancematrix method name.</param> /// <param name="profileAlignerName">SW/NW profiler</param> /// <param name="profileScoreName">Profile score function name.</param> /// <param name="gpOpenPenalty">Gap open penalty</param> /// <param name="gpExtendPenalty">Gap extended penalty</param> /// <param name="IsAlignedLargeSeq">True for large sequence else false</param> private void ValidatePamsamAlignWithGapCost( string nodeName, MoleculeType moleculeType, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName, int gpOpenPenalty, int gpExtendPenalty, bool IsAlignedLargeSeq) { Initialize(nodeName, expectedScoreNode); // MSA aligned sequences with sepcified gap costs. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmerLength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gpOpenPenalty, gpExtendPenalty, 2, 2); // Validate the aligned Sequence and score int index = 0; foreach (ISequence seq in msa.AlignedSequences) { if (IsAlignedLargeSeq) { Assert.AreEqual(new string(seq.Select(a => (char) a).ToArray()), new string(expectedSequences[index].Select(a => (char) a).ToArray())); index++; } } Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null))); ApplicationLog.WriteLine(String.Format(null, "PamsamP1Test:: Pamsam alignment completed successfully with equal gap cost for {0} moleculetype with all default params", moleculeType.ToString())); }
private void ValidatePamsamAlign( string nodeName, MoleculeType moleculeType, string expectedScoreNode, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, DistanceFunctionTypes distanceFunctionName, ProfileAlignerNames profileAlignerName, ProfileScoreFunctionNames profileScoreName, int kmrlength, bool addOnelineSequences, bool IsAlignForMoreSeq) { Initialize(nodeName, expectedScoreNode); if (addOnelineSequences) { AddOneLineSequences(nodeName); } // MSA aligned sequences. var msa = new PAMSAMMultipleSequenceAligner(lstSequences, kmrlength, distanceFunctionName, hierarchicalClusteringMethodName, profileAlignerName, profileScoreName, similarityMatrix, gapOpenPenalty, gapExtendPenalty, 2, 2); // Validate the aligned Sequence and score int index = 0; foreach (ISequence seq in msa.AlignedSequences) { if (IsAlignForMoreSeq) { Assert.IsTrue(expectedSequences.Contains(seq)); index++; } } Assert.IsTrue(expectedScore.Contains(msa.AlignmentScore.ToString((IFormatProvider) null))); }