/// <summary> /// Validate input sequences /// </summary> /// <param name="reads">The Reads</param> /// <returns>Valid reads.</returns> private IEnumerable <ISequence> ValidateReads(IEnumerable <ISequence> reads) { IAlphabet readAlphabet = Alphabets.GetAmbiguousAlphabet(reads.First().Alphabet); HashSet <byte> ambiguousSymbols = readAlphabet.GetAmbiguousSymbols(); HashSet <byte> gapSymbols; readAlphabet.TryGetGapSymbols(out gapSymbols); foreach (ISequence read in reads) { if (read.All(c => !ambiguousSymbols.Contains(c) && !gapSymbols.Contains(c))) { yield return(read); } else { continue; } } }
/// <summary> /// Validate input sequences /// </summary> /// <param name="reads">The Reads</param> /// <returns>Valid reads.</returns> private IEnumerable <ISequence> ValidateReads(IEnumerable <ISequence> reads) { IAlphabet readAlphabet = Alphabets.GetAmbiguousAlphabet(reads.First().Alphabet); HashSet <byte> ambiguousSymbols = readAlphabet.GetAmbiguousSymbols(); HashSet <byte> gapSymbols; readAlphabet.TryGetGapSymbols(out gapSymbols); foreach (ISequence read in reads) { string originalSequenceId; string pairedReadType; bool forward; string libraryName; if (Bio.Util.Helper.ValidatePairedSequenceId(read.ID, out originalSequenceId, out forward, out pairedReadType, out libraryName)) { if (!read.Alphabet.HasAmbiguity) { bool gapSymbolFound = false; for (long index = 0; index < read.Count; index++) { if (gapSymbols.Contains(read[index])) { gapSymbolFound = true; } } if (!gapSymbolFound) { // Exclude the otherinfo if any. read.ID = Bio.Util.Helper.GetReadIdExcludingOtherInfo(read.ID); yield return(read); } } else { continue; } } } }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="inputSequences"></param> /// <returns></returns> public IList <Bio.Algorithms.Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences) { List <ISequence> sequences = inputSequences.ToList(); // Initializations if (sequences.Count > 0) { if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(_alphabet); } else { ConsensusResolver.SequenceAlphabet = _alphabet; } } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (_profileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } _alignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 Performance.Snapshot("Stage 1"); // Generate DistanceMatrix KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, _kmerLength, _alphabet, _distanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree BinaryGuideTree binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreA) { _alignmentScoreA = currentScore; _alignedSequencesA = progressiveAlignerA.AlignedSequences; } if (_alignmentScoreA > _alignmentScore) { _alignmentScore = _alignmentScoreA; _alignedSequences = _alignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { _alignedSequencesB = _alignedSequencesA; _alignedSequencesC = _alignedSequencesA; _alignmentScoreB = _alignmentScoreA; _alignmentScoreC = _alignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (PAMSAMMultipleSequenceAligner.UseStageB) { // STAGE 2 Performance.Snapshot("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment int iterateTime = 0; while (true) { ++iterateTime; kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreB) { _alignmentScoreB = currentScore; _alignedSequencesB = progressiveAlignerB.AlignedSequences; break; } else { break; } } if (_alignmentScoreB > _alignmentScore) { _alignmentScore = _alignmentScoreB; _alignedSequences = _alignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 Performance.Snapshot("Stage 3"); // refinement //int maxRefineMentTime = sequences.Count * 2 - 2; int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; _alignedSequencesC = new List <ISequence>(sequences.Count); for (int i = 0; i < sequences.Count; ++i) { _alignedSequencesC.Add( new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), _alignedSequences[i].ToArray()) { ID = _alignedSequences[i].ID, Metadata = _alignedSequences[i].Metadata }); } List <int>[] leafNodeIndices = null; List <int>[] allIndelPositions = null; IProfileAlignment[] separatedProfileAlignments = null; List <int>[] eStrings = null; while (refinementTime < maxRefineMentTime) { ++refinementTime; Performance.Snapshot("Refinement iter " + refinementTime.ToString()); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); allIndelPositions = new List <int> [2]; separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < _alignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(_alignedSequencesC[i][j]); } } _alignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), seqBytes.ToArray())); _alignedSequencesC[i].ID = _alignedSequencesC[i].ID; (_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreC) { _alignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (_alignmentScoreC > _alignmentScore) { _alignmentScore = _alignmentScoreC; _alignedSequences = _alignedSequencesC; } Performance.Snapshot("Stop Stage 3"); } //just for the purpose of integrating PW and MSA with the same output IList <Bio.Algorithms.Alignment.ISequenceAlignment> results = new List <Bio.Algorithms.Alignment.ISequenceAlignment>(); return(results); }
/// <summary> /// This method assigns the alphabet from the input sequences /// </summary> /// <param name="sequences">Input sequences</param> /// <param name="similarityMatrix">Matrix to use for similarity comparisons</param> /// <param name="fixSimilarityMatrixErrors">True to fix any similarity matrix issue related to the alphabet.</param> private void SetAlphabet(IList <ISequence> sequences, SimilarityMatrix similarityMatrix, bool fixSimilarityMatrixErrors) { if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Validate data type _alphabet = Alphabets.GetAmbiguousAlphabet(sequences[0].Alphabet); Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, _alphabet)) { throw new ArgumentException("Inconsistent sequence alphabet"); } }); SimilarityMatrix bestSimilarityMatrix = null; if (_alphabet is DnaAlphabet) { bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna); } else if (_alphabet is RnaAlphabet) { bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna); } else if (_alphabet is ProteinAlphabet) { bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); } // Check or assign the similarity matrix. if (similarityMatrix == null) { SimilarityMatrix = bestSimilarityMatrix; if (SimilarityMatrix == null) { throw new ArgumentException("Unknown alphabet - could not choose SimilarityMatrix."); } } else { List <String> similarityMatrixDNA = new List <String> { "AmbiguousDNA" }; List <String> similarityMatrixRNA = new List <String> { "AmbiguousRNA" }; List <String> similarityMatrixProtein = new List <String> { "BLOSUM45", "BLOSUM50", "BLOSUM62", "BLOSUM80", "BLOSUM90", "PAM250", "PAM30", "PAM70" }; if (_alphabet is DnaAlphabet) { if (!similarityMatrixDNA.Contains(similarityMatrix.Name)) { if (fixSimilarityMatrixErrors) { SimilarityMatrix = bestSimilarityMatrix; } else { throw new ArgumentException("Inappropriate Similarity Matrix for DNA."); } } } else if (_alphabet is ProteinAlphabet) { if (!similarityMatrixProtein.Contains(similarityMatrix.Name)) { if (fixSimilarityMatrixErrors) { SimilarityMatrix = bestSimilarityMatrix; } else { throw new ArgumentException("Inappropriate Similarity Matrix for Protein."); } } } else if (_alphabet is RnaAlphabet) { if (!similarityMatrixRNA.Contains(similarityMatrix.Name)) { if (fixSimilarityMatrixErrors) { SimilarityMatrix = bestSimilarityMatrix; } else { throw new ArgumentException("Inappropriate Similarity Matrix for RNA."); } } } else { throw new ArgumentException("Invalid alphabet"); } } }
/// <summary> /// Returns an IEnumerable of sequences in the stream being parsed. /// </summary> /// <param name="reader">Stream to parse.</param> /// <param name="buffer">Buffer to use.</param> /// <returns>Returns a Sequence.</returns> ISequence ParseOne(TextReader reader, byte[] buffer) { if (reader == null) { throw new ArgumentNullException("reader"); } if (reader.Peek() == -1) { return(null); } int currentBufferSize = PlatformManager.Services.DefaultBufferSize; string message; string line = reader.ReadLine(); // Continue reading if blank line found. while (line != null && string.IsNullOrEmpty(line)) { line = reader.ReadLine(); } if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase)) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.INVALID_INPUT_FILE, Properties.Resource.FASTA_NAME); throw new Exception(message); } string name = line.Substring(1); int bufferPosition = 0; // Read next line. line = reader.ReadLine(); // Continue reading if blank line found. while (line != null && string.IsNullOrEmpty(line)) { line = reader.ReadLine(); } if (line == null) { message = string.Format( CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, string.Empty); throw new Exception(message); } IAlphabet alphabet = Alphabet; bool tryAutoDetectAlphabet = alphabet == null; do { // Files > 2G are not supported in this release. if ((((long)bufferPosition + line.Length) >= PlatformManager.Services.MaxSequenceSize)) { throw new ArgumentOutOfRangeException( string.Format(CultureInfo.CurrentUICulture, Properties.Resource.SequenceDataGreaterthan2GB, name)); } int neededSize = bufferPosition + line.Length; if (neededSize >= currentBufferSize) { //Grow file dynamically, by buffer size, or if too small to fit the new sequence by the size of the sequence int suggestedSize = buffer.Length + PlatformManager.Services.DefaultBufferSize; int newSize = neededSize < suggestedSize ? suggestedSize : neededSize; Array.Resize(ref buffer, newSize); currentBufferSize = newSize; } byte[] symbols = Encoding.UTF8.GetBytes(line); // Array.Copy -- for performance improvement. Array.Copy(symbols, 0, buffer, bufferPosition, symbols.Length); // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet if (tryAutoDetectAlphabet) { // If we have a base alphabet we detected earlier, // then try that first. if (this.baseAlphabet != null && this.baseAlphabet.ValidateSequence(buffer, bufferPosition, line.Length)) { alphabet = this.baseAlphabet; } // Otherwise attempt to identify alphabet else { // Different alphabet - try to auto detect. this.baseAlphabet = null; alphabet = Alphabets.AutoDetectAlphabet(buffer, bufferPosition, bufferPosition + line.Length, alphabet); if (alphabet == null) { throw new Exception(string.Format(CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, line)); } } // Determine the base alphabet used. if (this.baseAlphabet == null) { this.baseAlphabet = alphabet; } else { // If they are not the same, then this might be an error. if (this.baseAlphabet != alphabet) { // If the new alphabet includes all the base alphabet then use it instead. // This happens when we hit an ambiguous form of the alphabet later in the file. if (!this.baseAlphabet.HasAmbiguity && Alphabets.GetAmbiguousAlphabet(this.baseAlphabet) == alphabet) { this.baseAlphabet = alphabet; } else if (alphabet.HasAmbiguity || Alphabets.GetAmbiguousAlphabet(alphabet) != this.baseAlphabet) { throw new Exception(Properties.Resource.FastAContainsMorethanOnebaseAlphabet); } } } } else { // Validate against supplied alphabet. if (!alphabet.ValidateSequence(buffer, bufferPosition, line.Length)) { throw new Exception(string.Format(CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, line)); } } bufferPosition += line.Length; if (reader.Peek() == (byte)'>') { break; } // Read next line. line = reader.ReadLine(); // Continue reading if blank line found. while (line != null && string.IsNullOrEmpty(line) && reader.Peek() != (byte)'>') { line = reader.ReadLine(); } }while (line != null); // Truncate buffer to remove trailing 0's byte[] tmpBuffer = new byte[bufferPosition]; Array.Copy(buffer, tmpBuffer, bufferPosition); if (tryAutoDetectAlphabet) { alphabet = this.baseAlphabet; } // In memory sequence return(new Sequence(alphabet, tmpBuffer, false) { ID = name }); }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="sequences">Input sequences</param> /// <returns>Alignment results</returns> private void DoAlignment(IList <ISequence> sequences) { Debug.Assert(this.alphabet != null); Debug.Assert(sequences.Count > 0); // Initializations if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(this.alphabet); } else { ConsensusResolver.SequenceAlphabet = this.alphabet; } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (ProfileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } this.AlignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 ReportLog("Stage 1"); // Generate DistanceMatrix var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreA) { this.AlignmentScoreA = currentScore; this.AlignedSequencesA = progressiveAlignerA.AlignedSequences; } if (this.AlignmentScoreA > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreA; this.AlignedSequences = this.AlignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { this.AlignedSequencesB = this.AlignedSequencesA; this.AlignedSequencesC = this.AlignedSequencesA; this.AlignmentScoreB = this.AlignmentScoreA; this.AlignmentScoreC = this.AlignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (UseStageB) { // STAGE 2 ReportLog("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment while (true) { kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreB) { this.AlignmentScoreB = currentScore; this.AlignedSequencesB = progressiveAlignerB.AlignedSequences; } break; } if (this.AlignmentScoreB > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreB; this.AlignedSequences = this.AlignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 ReportLog("Stage 3"); // refinement int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; this.AlignedSequencesC = new List <ISequence>(this.AlignedSequences.Count); foreach (ISequence t in this.AlignedSequences) { this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray()) { ID = t.ID, // Do not shallow copy dictionary //Metadata = t.Metadata }); } while (refinementTime < maxRefineMentTime) { ++refinementTime; ReportLog("Refinement iter " + refinementTime); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { List <int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); List <int>[] allIndelPositions = new List <int> [2]; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); List <int>[] eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], ParallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(this.AlignedSequencesC[i][j]); } } this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray())); this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID; // Do not shallow copy dictionary //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreC) { this.AlignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (this.AlignmentScoreC > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreC; this.AlignedSequences = this.AlignedSequencesC; } ReportLog("Stop Stage 3"); } }