Example #1
0
        /// <summary>
        /// Validate input sequences
        /// </summary>
        /// <param name="reads">The Reads</param>
        /// <returns>Valid reads.</returns>
        private IEnumerable <ISequence> ValidateReads(IEnumerable <ISequence> reads)
        {
            IAlphabet      readAlphabet     = Alphabets.GetAmbiguousAlphabet(reads.First().Alphabet);
            HashSet <byte> ambiguousSymbols = readAlphabet.GetAmbiguousSymbols();
            HashSet <byte> gapSymbols;

            readAlphabet.TryGetGapSymbols(out gapSymbols);

            foreach (ISequence read in reads)
            {
                if (read.All(c => !ambiguousSymbols.Contains(c) && !gapSymbols.Contains(c)))
                {
                    yield return(read);
                }
                else
                {
                    continue;
                }
            }
        }
Example #2
0
        /// <summary>
        /// Validate input sequences
        /// </summary>
        /// <param name="reads">The Reads</param>
        /// <returns>Valid reads.</returns>
        private IEnumerable <ISequence> ValidateReads(IEnumerable <ISequence> reads)
        {
            IAlphabet      readAlphabet     = Alphabets.GetAmbiguousAlphabet(reads.First().Alphabet);
            HashSet <byte> ambiguousSymbols = readAlphabet.GetAmbiguousSymbols();
            HashSet <byte> gapSymbols;

            readAlphabet.TryGetGapSymbols(out gapSymbols);

            foreach (ISequence read in reads)
            {
                string originalSequenceId;
                string pairedReadType;
                bool   forward;
                string libraryName;
                if (Bio.Util.Helper.ValidatePairedSequenceId(read.ID, out originalSequenceId, out forward, out pairedReadType, out libraryName))
                {
                    if (!read.Alphabet.HasAmbiguity)
                    {
                        bool gapSymbolFound = false;
                        for (long index = 0; index < read.Count; index++)
                        {
                            if (gapSymbols.Contains(read[index]))
                            {
                                gapSymbolFound = true;
                            }
                        }

                        if (!gapSymbolFound)
                        {
                            // Exclude the otherinfo if any.
                            read.ID = Bio.Util.Helper.GetReadIdExcludingOtherInfo(read.ID);
                            yield return(read);
                        }
                    }
                    else
                    {
                        continue;
                    }
                }
            }
        }
Example #3
0
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="inputSequences"></param>
        /// <returns></returns>
        public IList <Bio.Algorithms.Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences)
        {
            List <ISequence> sequences = inputSequences.ToList();

            // Initializations
            if (sequences.Count > 0)
            {
                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_alphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _alphabet;
                }
            }

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;

            switch (_profileAlignerName)
            {
            case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new NeedlemanWunschProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new NeedlemanWunschProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            case (ProfileAlignerNames.SmithWatermanProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new SmithWatermanProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            default:
                throw new ArgumentException("Invalid profile aligner name");
            }

            _alignedSequences = new List <ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            Performance.Snapshot("Stage 1");
            // Generate DistanceMatrix
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, _kmerLength, _alphabet, _distanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

            // Generate Guide Tree
            BinaryGuideTree binaryGuideTree =
                new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);

            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > _alignmentScoreA)
            {
                _alignmentScoreA   = currentScore;
                _alignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (_alignmentScoreA > _alignmentScore)
            {
                _alignmentScore   = _alignmentScoreA;
                _alignedSequences = _alignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                _alignedSequencesB = _alignedSequencesA;
                _alignedSequencesC = _alignedSequencesA;
                _alignmentScoreB   = _alignmentScoreA;
                _alignmentScoreC   = _alignmentScoreA;
            }
            else
            {
                BinaryGuideTree               binaryGuideTreeB              = null;
                IHierarchicalClustering       hierarcicalClusteringB        = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (PAMSAMMultipleSequenceAligner.UseStageB)
                {
                    // STAGE 2
                    Performance.Snapshot("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    int iterateTime = 0;

                    while (true)
                    {
                        ++iterateTime;
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                     (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreB)
                        {
                            _alignmentScoreB   = currentScore;
                            _alignedSequencesB = progressiveAlignerB.AlignedSequences;
                            break;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (_alignmentScoreB > _alignmentScore)
                    {
                        _alignmentScore   = _alignmentScoreB;
                        _alignedSequences = _alignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                Performance.Snapshot("Stage 3");
                // refinement
                //int maxRefineMentTime = sequences.Count * 2 - 2;
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                _alignedSequencesC = new List <ISequence>(sequences.Count);
                for (int i = 0; i < sequences.Count; ++i)
                {
                    _alignedSequencesC.Add(
                        new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet),
                                     _alignedSequences[i].ToArray())
                    {
                        ID       = _alignedSequences[i].ID,
                        Metadata = _alignedSequences[i].Metadata
                    });
                }

                List <int>[]        leafNodeIndices            = null;
                List <int>[]        allIndelPositions          = null;
                IProfileAlignment[] separatedProfileAlignments = null;
                List <int>[]        eStrings = null;

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    Performance.Snapshot("Refinement iter " + refinementTime.ToString());
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        allIndelPositions = new List <int> [2];

                        separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        eStrings = new List <int> [2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List <byte> seqBytes = new List <byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < _alignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(_alignedSequencesC[i][j]);
                                    }
                                }

                                _alignedSequencesC[i]    = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), seqBytes.ToArray()));
                                _alignedSequencesC[i].ID = _alignedSequencesC[i].ID;
                                (_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreC)
                        {
                            _alignmentScoreC = currentScore;
                            needRefinement   = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                         (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }
                }
                if (_alignmentScoreC > _alignmentScore)
                {
                    _alignmentScore   = _alignmentScoreC;
                    _alignedSequences = _alignedSequencesC;
                }
                Performance.Snapshot("Stop Stage 3");
            }

            //just for the purpose of integrating PW and MSA with the same output
            IList <Bio.Algorithms.Alignment.ISequenceAlignment> results = new List <Bio.Algorithms.Alignment.ISequenceAlignment>();

            return(results);
        }
Example #4
0
        /// <summary>
        /// This method assigns the alphabet from the input sequences
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <param name="similarityMatrix">Matrix to use for similarity comparisons</param>
        /// <param name="fixSimilarityMatrixErrors">True to fix any similarity matrix issue related to the alphabet.</param>
        private void SetAlphabet(IList <ISequence> sequences, SimilarityMatrix similarityMatrix, bool fixSimilarityMatrixErrors)
        {
            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty input sequences");
            }

            // Validate data type
            _alphabet = Alphabets.GetAmbiguousAlphabet(sequences[0].Alphabet);
            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, _alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            SimilarityMatrix bestSimilarityMatrix = null;

            if (_alphabet is DnaAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            }
            else if (_alphabet is RnaAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
            }
            else if (_alphabet is ProteinAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50);
            }

            // Check or assign the similarity matrix.
            if (similarityMatrix == null)
            {
                SimilarityMatrix = bestSimilarityMatrix;
                if (SimilarityMatrix == null)
                {
                    throw new ArgumentException("Unknown alphabet - could not choose SimilarityMatrix.");
                }
            }
            else
            {
                List <String> similarityMatrixDNA = new List <String> {
                    "AmbiguousDNA"
                };
                List <String> similarityMatrixRNA = new List <String> {
                    "AmbiguousRNA"
                };
                List <String> similarityMatrixProtein = new List <String> {
                    "BLOSUM45", "BLOSUM50", "BLOSUM62", "BLOSUM80", "BLOSUM90", "PAM250", "PAM30", "PAM70"
                };

                if (_alphabet is DnaAlphabet)
                {
                    if (!similarityMatrixDNA.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                        {
                            SimilarityMatrix = bestSimilarityMatrix;
                        }
                        else
                        {
                            throw new ArgumentException("Inappropriate Similarity Matrix for DNA.");
                        }
                    }
                }
                else if (_alphabet is ProteinAlphabet)
                {
                    if (!similarityMatrixProtein.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                        {
                            SimilarityMatrix = bestSimilarityMatrix;
                        }
                        else
                        {
                            throw new ArgumentException("Inappropriate Similarity Matrix for Protein.");
                        }
                    }
                }
                else if (_alphabet is RnaAlphabet)
                {
                    if (!similarityMatrixRNA.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                        {
                            SimilarityMatrix = bestSimilarityMatrix;
                        }
                        else
                        {
                            throw new ArgumentException("Inappropriate Similarity Matrix for RNA.");
                        }
                    }
                }
                else
                {
                    throw new ArgumentException("Invalid alphabet");
                }
            }
        }
Example #5
0
        /// <summary>
        /// Returns an IEnumerable of sequences in the stream being parsed.
        /// </summary>
        /// <param name="reader">Stream to parse.</param>
        /// <param name="buffer">Buffer to use.</param>
        /// <returns>Returns a Sequence.</returns>
        ISequence ParseOne(TextReader reader, byte[] buffer)
        {
            if (reader == null)
            {
                throw new ArgumentNullException("reader");
            }

            if (reader.Peek() == -1)
            {
                return(null);
            }

            int currentBufferSize = PlatformManager.Services.DefaultBufferSize;

            string message;
            string line = reader.ReadLine();

            // Continue reading if blank line found.
            while (line != null && string.IsNullOrEmpty(line))
            {
                line = reader.ReadLine();
            }

            if (line == null || !line.StartsWith(">", StringComparison.OrdinalIgnoreCase))
            {
                message = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.INVALID_INPUT_FILE,
                    Properties.Resource.FASTA_NAME);

                throw new Exception(message);
            }

            string name           = line.Substring(1);
            int    bufferPosition = 0;

            // Read next line.
            line = reader.ReadLine();

            // Continue reading if blank line found.
            while (line != null && string.IsNullOrEmpty(line))
            {
                line = reader.ReadLine();
            }

            if (line == null)
            {
                message = string.Format(
                    CultureInfo.InvariantCulture,
                    Properties.Resource.InvalidSymbolInString,
                    string.Empty);
                throw new Exception(message);
            }

            IAlphabet alphabet = Alphabet;
            bool      tryAutoDetectAlphabet = alphabet == null;

            do
            {
                // Files > 2G are not supported in this release.
                if ((((long)bufferPosition + line.Length) >= PlatformManager.Services.MaxSequenceSize))
                {
                    throw new ArgumentOutOfRangeException(
                              string.Format(CultureInfo.CurrentUICulture, Properties.Resource.SequenceDataGreaterthan2GB, name));
                }
                int neededSize = bufferPosition + line.Length;
                if (neededSize >= currentBufferSize)
                {
                    //Grow file dynamically, by buffer size, or if too small to fit the new sequence by the size of the sequence
                    int suggestedSize = buffer.Length + PlatformManager.Services.DefaultBufferSize;
                    int newSize       = neededSize < suggestedSize ? suggestedSize : neededSize;
                    Array.Resize(ref buffer, newSize);
                    currentBufferSize = newSize;
                }

                byte[] symbols = Encoding.UTF8.GetBytes(line);

                // Array.Copy -- for performance improvement.
                Array.Copy(symbols, 0, buffer, bufferPosition, symbols.Length);

                // Auto detect alphabet if alphabet is set to null, else validate with already set alphabet
                if (tryAutoDetectAlphabet)
                {
                    // If we have a base alphabet we detected earlier,
                    // then try that first.
                    if (this.baseAlphabet != null &&
                        this.baseAlphabet.ValidateSequence(buffer, bufferPosition, line.Length))
                    {
                        alphabet = this.baseAlphabet;
                    }
                    // Otherwise attempt to identify alphabet
                    else
                    {
                        // Different alphabet - try to auto detect.
                        this.baseAlphabet = null;
                        alphabet          = Alphabets.AutoDetectAlphabet(buffer, bufferPosition, bufferPosition + line.Length, alphabet);
                        if (alphabet == null)
                        {
                            throw new Exception(string.Format(CultureInfo.InvariantCulture,
                                                              Properties.Resource.InvalidSymbolInString, line));
                        }
                    }

                    // Determine the base alphabet used.
                    if (this.baseAlphabet == null)
                    {
                        this.baseAlphabet = alphabet;
                    }
                    else
                    {
                        // If they are not the same, then this might be an error.
                        if (this.baseAlphabet != alphabet)
                        {
                            // If the new alphabet includes all the base alphabet then use it instead.
                            // This happens when we hit an ambiguous form of the alphabet later in the file.
                            if (!this.baseAlphabet.HasAmbiguity && Alphabets.GetAmbiguousAlphabet(this.baseAlphabet) == alphabet)
                            {
                                this.baseAlphabet = alphabet;
                            }
                            else if (alphabet.HasAmbiguity || Alphabets.GetAmbiguousAlphabet(alphabet) != this.baseAlphabet)
                            {
                                throw new Exception(Properties.Resource.FastAContainsMorethanOnebaseAlphabet);
                            }
                        }
                    }
                }
                else
                {
                    // Validate against supplied alphabet.
                    if (!alphabet.ValidateSequence(buffer, bufferPosition, line.Length))
                    {
                        throw new Exception(string.Format(CultureInfo.InvariantCulture, Properties.Resource.InvalidSymbolInString, line));
                    }
                }

                bufferPosition += line.Length;

                if (reader.Peek() == (byte)'>')
                {
                    break;
                }

                // Read next line.
                line = reader.ReadLine();

                // Continue reading if blank line found.
                while (line != null && string.IsNullOrEmpty(line) && reader.Peek() != (byte)'>')
                {
                    line = reader.ReadLine();
                }
            }while (line != null);

            // Truncate buffer to remove trailing 0's
            byte[] tmpBuffer = new byte[bufferPosition];
            Array.Copy(buffer, tmpBuffer, bufferPosition);

            if (tryAutoDetectAlphabet)
            {
                alphabet = this.baseAlphabet;
            }

            // In memory sequence
            return(new Sequence(alphabet, tmpBuffer, false)
            {
                ID = name
            });
        }
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        private void DoAlignment(IList <ISequence> sequences)
        {
            Debug.Assert(this.alphabet != null);
            Debug.Assert(sequences.Count > 0);

            // Initializations
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(this.alphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = this.alphabet;
            }

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;

            switch (ProfileAlignerName)
            {
            case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                if (this.degreeOfParallelism == 1)
                {
                    profileAligner = new NeedlemanWunschProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                else
                {
                    profileAligner = new NeedlemanWunschProfileAlignerParallel(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                break;

            case (ProfileAlignerNames.SmithWatermanProfileAligner):
                if (this.degreeOfParallelism == 1)
                {
                    profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                else
                {
                    profileAligner = new SmithWatermanProfileAlignerParallel(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                break;

            default:
                throw new ArgumentException("Invalid profile aligner name");
            }

            this.AlignedSequences = new List <ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            ReportLog("Stage 1");
            // Generate DistanceMatrix
            var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

            // Generate Guide Tree
            var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);

            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > this.AlignmentScoreA)
            {
                this.AlignmentScoreA   = currentScore;
                this.AlignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (this.AlignmentScoreA > this.AlignmentScore)
            {
                this.AlignmentScore   = this.AlignmentScoreA;
                this.AlignedSequences = this.AlignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                this.AlignedSequencesB = this.AlignedSequencesA;
                this.AlignedSequencesC = this.AlignedSequencesA;
                this.AlignmentScoreB   = this.AlignmentScoreA;
                this.AlignmentScoreC   = this.AlignmentScoreA;
            }
            else
            {
                BinaryGuideTree               binaryGuideTreeB              = null;
                IHierarchicalClustering       hierarcicalClusteringB        = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (UseStageB)
                {
                    // STAGE 2
                    ReportLog("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    while (true)
                    {
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                     (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreB)
                        {
                            this.AlignmentScoreB   = currentScore;
                            this.AlignedSequencesB = progressiveAlignerB.AlignedSequences;
                        }
                        break;
                    }
                    if (this.AlignmentScoreB > this.AlignmentScore)
                    {
                        this.AlignmentScore   = this.AlignmentScoreB;
                        this.AlignedSequences = this.AlignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                ReportLog("Stage 3");
                // refinement
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                this.AlignedSequencesC = new List <ISequence>(this.AlignedSequences.Count);
                foreach (ISequence t in this.AlignedSequences)
                {
                    this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray())
                    {
                        ID = t.ID,
                        // Do not shallow copy dictionary
                        //Metadata = t.Metadata
                    });
                }

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    ReportLog("Refinement iter " + refinementTime);
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        List <int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        List <int>[] allIndelPositions = new List <int> [2];

                        IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        List <int>[]        eStrings = new List <int> [2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], ParallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List <byte> seqBytes = new List <byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(this.AlignedSequencesC[i][j]);
                                    }
                                }

                                this.AlignedSequencesC[i]    = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray()));
                                this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID;
                                // Do not shallow copy dictionary
                                //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreC)
                        {
                            this.AlignmentScoreC = currentScore;
                            needRefinement       = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                         (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }
                }
                if (this.AlignmentScoreC > this.AlignmentScore)
                {
                    this.AlignmentScore   = this.AlignmentScoreC;
                    this.AlignedSequences = this.AlignedSequencesC;
                }
                ReportLog("Stop Stage 3");
            }
        }