/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="inputSequences"></param> /// <returns></returns> public IList <Bio.Algorithms.Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences) { List <ISequence> sequences = inputSequences.ToList(); // Initializations if (sequences.Count > 0) { if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(_alphabet); } else { ConsensusResolver.SequenceAlphabet = _alphabet; } } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (_profileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } _alignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 Performance.Snapshot("Stage 1"); // Generate DistanceMatrix KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, _kmerLength, _alphabet, _distanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree BinaryGuideTree binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreA) { _alignmentScoreA = currentScore; _alignedSequencesA = progressiveAlignerA.AlignedSequences; } if (_alignmentScoreA > _alignmentScore) { _alignmentScore = _alignmentScoreA; _alignedSequences = _alignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { _alignedSequencesB = _alignedSequencesA; _alignedSequencesC = _alignedSequencesA; _alignmentScoreB = _alignmentScoreA; _alignmentScoreC = _alignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (PAMSAMMultipleSequenceAligner.UseStageB) { // STAGE 2 Performance.Snapshot("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment int iterateTime = 0; while (true) { ++iterateTime; kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreB) { _alignmentScoreB = currentScore; _alignedSequencesB = progressiveAlignerB.AlignedSequences; break; } else { break; } } if (_alignmentScoreB > _alignmentScore) { _alignmentScore = _alignmentScoreB; _alignedSequences = _alignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 Performance.Snapshot("Stage 3"); // refinement //int maxRefineMentTime = sequences.Count * 2 - 2; int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; _alignedSequencesC = new List <ISequence>(sequences.Count); for (int i = 0; i < sequences.Count; ++i) { _alignedSequencesC.Add( new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), _alignedSequences[i].ToArray()) { ID = _alignedSequences[i].ID, Metadata = _alignedSequences[i].Metadata }); } List <int>[] leafNodeIndices = null; List <int>[] allIndelPositions = null; IProfileAlignment[] separatedProfileAlignments = null; List <int>[] eStrings = null; while (refinementTime < maxRefineMentTime) { ++refinementTime; Performance.Snapshot("Refinement iter " + refinementTime.ToString()); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); allIndelPositions = new List <int> [2]; separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < _alignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(_alignedSequencesC[i][j]); } } _alignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), seqBytes.ToArray())); _alignedSequencesC[i].ID = _alignedSequencesC[i].ID; (_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreC) { _alignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (_alignmentScoreC > _alignmentScore) { _alignmentScore = _alignmentScoreC; _alignedSequences = _alignedSequencesC; } Performance.Snapshot("Stop Stage 3"); } //just for the purpose of integrating PW and MSA with the same output IList <Bio.Algorithms.Alignment.ISequenceAlignment> results = new List <Bio.Algorithms.Alignment.ISequenceAlignment>(); return(results); }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="sequences">Input sequences</param> /// <returns>Alignment results</returns> private void DoAlignment(IList<ISequence> sequences) { Debug.Assert(this.alphabet != null); Debug.Assert(sequences.Count > 0); // Initializations if (ConsensusResolver == null) ConsensusResolver = new SimpleConsensusResolver(this.alphabet); else ConsensusResolver.SequenceAlphabet = this.alphabet; // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (ProfileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } this.AlignedSequences = new List<ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 ReportLog("Stage 1"); // Generate DistanceMatrix var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreA) { this.AlignmentScoreA = currentScore; this.AlignedSequencesA = progressiveAlignerA.AlignedSequences; } if (this.AlignmentScoreA > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreA; this.AlignedSequences = this.AlignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { this.AlignedSequencesB = this.AlignedSequencesA; this.AlignedSequencesC = this.AlignedSequencesA; this.AlignmentScoreB = this.AlignmentScoreA; this.AlignmentScoreC = this.AlignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (UseStageB) { // STAGE 2 ReportLog("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment while (true) { kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreB) { this.AlignmentScoreB = currentScore; this.AlignedSequencesB = progressiveAlignerB.AlignedSequences; } break; } if (this.AlignmentScoreB > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreB; this.AlignedSequences = this.AlignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 ReportLog("Stage 3"); // refinement int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; this.AlignedSequencesC = new List<ISequence>(this.AlignedSequences.Count); foreach (ISequence t in this.AlignedSequences) { this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray()) { ID = t.ID, // Do not shallow copy dictionary //Metadata = t.Metadata }); } while (refinementTime < maxRefineMentTime) { ++refinementTime; ReportLog("Refinement iter " + refinementTime); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { List<int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); List<int>[] allIndelPositions = new List<int>[2]; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); List<int>[] eStrings = new List<int>[2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], ParallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List<byte> seqBytes = new List<byte>(); int indexAllIndel = 0; for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(this.AlignedSequencesC[i][j]); } } this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray())); this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID; // Do not shallow copy dictionary //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreC) { this.AlignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (this.AlignmentScoreC > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreC; this.AlignedSequences = this.AlignedSequencesC; } ReportLog("Stop Stage 3"); } }
/// <summary> /// Creates binarytree using stage1 sequences and /// cut the binary tree at an random edge to get two profiles. /// Create NeedlemanWunschProfileAlignerSerial\Parallel instance /// according to degree of parallelism /// and using profile function score . Execute Align() method. /// Validates the IProfileAlignment properties. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="degreeOfParallelism">if 1 it is serial Profiler else parallel profiler</param> /// <param name="edgeIndex">edge index to cut the tree</param> /// <param name="profileFunction">profile function score name</param> private void ValidateProfileAlignerAlignWithProfileFunctionScore( string nodeName, int degreeOfParallelism, ProfileScoreFunctionNames profileFunction, int edgeIndex) { Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleDnaSequenceNode); // Get Stage2 Binary Tree List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); // Cut Tree at an edge and get sequences. List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex); // Extract profiles List<int>[] removedPositions = null; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction( stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions); IProfileAligner aligner = null; if (1 == degreeOfParallelism) { aligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, profileFunction, gapOpenPenalty, gapExtendPenalty, 2); } else { if (Environment.ProcessorCount >= degreeOfParallelism) { aligner = new NeedlemanWunschProfileAlignerParallel(similarityMatrix, profileFunction, gapOpenPenalty, gapExtendPenalty, 2); } else { ApplicationLog.WriteLine(String.Format(null, @"PamsamBvtTest: NeedlemanWunschProfileAlignerParallel could not be instantiated as number of processor is {0} and degree of parallelism {1}", Environment.ProcessorCount.ToString((IFormatProvider) null), degreeOfParallelism)); } } if (null != aligner) { IProfileAlignment profileAlignment = aligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[0]); // Validate profile alignement string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize); string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize); Assert.IsTrue( expectedColSize.Contains(profileAlignment.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null))); Assert.IsTrue( expectedRowSize.Contains(profileAlignment.ProfilesMatrix.RowSize.ToString((IFormatProvider) null))); ApplicationLog.WriteLine(String.Format(null, @"PamsamBvtTest: {0} Align() method validation completed successfully with number of processor is {1} and degree of parallelism {2}", profileAligner, Environment.ProcessorCount.ToString((IFormatProvider) null), degreeOfParallelism)); } else { Assert.Fail("Profile Aligner is not instantiated"); } }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="sequences">Input sequences</param> /// <returns>Alignment results</returns> private void DoAlignment(IList <ISequence> sequences) { Debug.Assert(this.alphabet != null); Debug.Assert(sequences.Count > 0); // Initializations if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(this.alphabet); } else { ConsensusResolver.SequenceAlphabet = this.alphabet; } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (ProfileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } this.AlignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 ReportLog("Stage 1"); // Generate DistanceMatrix var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreA) { this.AlignmentScoreA = currentScore; this.AlignedSequencesA = progressiveAlignerA.AlignedSequences; } if (this.AlignmentScoreA > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreA; this.AlignedSequences = this.AlignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { this.AlignedSequencesB = this.AlignedSequencesA; this.AlignedSequencesC = this.AlignedSequencesA; this.AlignmentScoreB = this.AlignmentScoreA; this.AlignmentScoreC = this.AlignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (UseStageB) { // STAGE 2 ReportLog("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment while (true) { kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreB) { this.AlignmentScoreB = currentScore; this.AlignedSequencesB = progressiveAlignerB.AlignedSequences; } break; } if (this.AlignmentScoreB > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreB; this.AlignedSequences = this.AlignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 ReportLog("Stage 3"); // refinement int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; this.AlignedSequencesC = new List <ISequence>(this.AlignedSequences.Count); foreach (ISequence t in this.AlignedSequences) { this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray()) { ID = t.ID, // Do not shallow copy dictionary //Metadata = t.Metadata }); } while (refinementTime < maxRefineMentTime) { ++refinementTime; ReportLog("Refinement iter " + refinementTime); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { List <int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); List <int>[] allIndelPositions = new List <int> [2]; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); List <int>[] eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], ParallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(this.AlignedSequencesC[i][j]); } } this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray())); this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID; // Do not shallow copy dictionary //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreC) { this.AlignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (this.AlignmentScoreC > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreC; this.AlignedSequences = this.AlignedSequencesC; } ReportLog("Stop Stage 3"); } }
/// <summary> /// Creates binarytree using stage1 sequences and /// cut the binary tree at an random edge to get two profiles. /// Create NeedlemanWunschProfileAlignerSerial\Parallel instance /// according to degree of parallelism /// and execute AlignSimple\Align() method using two profiles. /// Validate the IProfileAlignment properties. /// </summary> /// <param name="nodeName">xml node name</param> /// <param name="moleculeType">Molecule Type</param> /// <param name="degreeOfParallelism">if 1 it is serial Profiler else parallel profiler</param> /// <param name="edgeIndex">edge index to cut the tree</param> /// <param name="overloadType">Execute Align()\AlignSimple()</param> private void ValidateProfileAlignerAlign(string nodeName, MoleculeType moleculeType, int degreeOfParallelism, int edgeIndex, AlignType overloadType) { switch (moleculeType) { case MoleculeType.DNA: Initialize(Constants.MuscleDnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleDnaSequenceNode); break; case MoleculeType.RNA: Initialize(Constants.MuscleRnaSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleRnaSequenceNode); break; case MoleculeType.Protein: Initialize(Constants.MuscleProteinSequenceNode, Constants.ExpectedScoreNode); InitializeStage2Variables(Constants.MuscleProteinSequenceNode); break; } // Get Stage2 Binary Tree List<ISequence> stage1AlignedSequences = GetStage1AlignedSequence(moleculeType); IDistanceMatrix matrix = GetKimuraDistanceMatrix(stage1AlignedSequences); IHierarchicalClustering hierarcicalClustering = GetHierarchicalClustering(matrix); BinaryGuideTree binaryTree = GetBinaryTree(hierarcicalClustering); // Cut Tree at an edge and get sequences. List<int>[] leafNodeIndices = binaryTree.SeparateSequencesByCuttingTree(edgeIndex); // Extract profiles List<int>[] removedPositions = null; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction( stage2ExpectedSequences, leafNodeIndices[0], leafNodeIndices[1], out removedPositions); // Align it IProfileAlignment alignedProfile = null; if (1 == degreeOfParallelism) { var nwprofileAligner = new NeedlemanWunschProfileAlignerSerial(similarityMatrix, ProfileScoreFunctionNames.InnerProductFast, gapOpenPenalty, gapExtendPenalty, 2); switch (overloadType) { case AlignType.AlignSimpleAllParams: alignedProfile = nwprofileAligner.AlignSimple(similarityMatrix, gapOpenPenalty, separatedProfileAlignments[0], separatedProfileAlignments[1]); break; case AlignType.AlignSimpleOnlyProfiles: alignedProfile = nwprofileAligner.AlignSimple(separatedProfileAlignments[0], separatedProfileAlignments[1]); break; case AlignType.AlignAllParams: alignedProfile = nwprofileAligner.Align(similarityMatrix, gapOpenPenalty, gapExtendPenalty, separatedProfileAlignments[0], separatedProfileAlignments[1]); break; } } else { if (Environment.ProcessorCount >= degreeOfParallelism) { var nwprofileAlignerParallel = new NeedlemanWunschProfileAlignerParallel( similarityMatrix, ProfileScoreFunctionNames.InnerProductFast, gapOpenPenalty, gapExtendPenalty, 2); alignedProfile = nwprofileAlignerParallel.AlignSimple( separatedProfileAlignments[0], separatedProfileAlignments[1]); switch (overloadType) { case AlignType.AlignSimpleAllParams: alignedProfile = nwprofileAlignerParallel.AlignSimple(similarityMatrix, gapOpenPenalty, separatedProfileAlignments[0], separatedProfileAlignments[1]); break; case AlignType.AlignSimpleOnlyProfiles: alignedProfile = nwprofileAlignerParallel.AlignSimple( separatedProfileAlignments[0], separatedProfileAlignments[1]); break; case AlignType.AlignAllParams: alignedProfile = nwprofileAlignerParallel.Align( similarityMatrix, gapOpenPenalty, gapExtendPenalty, separatedProfileAlignments[0], separatedProfileAlignments[1]); break; } } else { ApplicationLog.WriteLine( String.Format(null, @"PamsamP1Test: NeedlemanWunschProfileAlignerParallel could not be instantiated as number of processor is {0} and degree of parallelism {1}", Environment.ProcessorCount.ToString((IFormatProvider) null), degreeOfParallelism)); } } if (null != alignedProfile) { // Validate profile alignement string expectedRowSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RowSize); string expectedColSize = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ColumnSize); Assert.AreEqual(expectedColSize, alignedProfile.ProfilesMatrix.ColumnSize.ToString((IFormatProvider) null)); Assert.AreEqual(expectedRowSize, alignedProfile.ProfilesMatrix.RowSize.ToString((IFormatProvider) null)); ApplicationLog.WriteLine( String.Format(null, @"PamsamP1Test: {0} {1} method validation completed successfully with number of processor is {2} and degree of parallelism {3} for molecule type {4}", profileAligner, overloadType, Environment.ProcessorCount.ToString((IFormatProvider) null), degreeOfParallelism, moleculeType)); } else { Assert.Fail("Profile Aligner is not instantiated"); } }