Exemple #1
0
        /// <summary>
        /// Generate IProfileAlignment from a set of aligned sequences
        /// </summary>
        /// <param name="sequences">aligned sequences</param>
        public static IProfileAlignment GenerateProfileAlignment(ICollection <ISequence> sequences)
        {
            IProfiles         profileMatrix    = Profiles.GenerateProfiles(sequences);
            IProfileAlignment profileAlignment = new ProfileAlignment();

            profileAlignment.NumberOfSequences = sequences.Count;
            profileAlignment.ProfilesMatrix    = profileMatrix;
            return(profileAlignment);
        }
Exemple #2
0
        /// <summary>
        /// Generate a profileAlignment from one single sequence
        /// The set of sequence items of the seq should be the same as
        /// 'static ItemSet' of the IProfiles.
        /// </summary>
        /// <param name="seq">an input sequence</param>
        /// <param name="weight">sequence weight</param>
        public static IProfileAlignment GenerateProfileAlignment(ISequence seq, float weight)
        {
            IProfiles         profileMatrix    = Profiles.GenerateProfiles(seq, weight);
            IProfileAlignment profileAlignment = new ProfileAlignment();

            profileAlignment.NumberOfSequences = 1;
            profileAlignment.ProfilesMatrix    = profileMatrix;
            return(profileAlignment);
        }
Exemple #3
0
 /// <summary>
 /// Construct a node with assigned ID
 /// </summary>
 /// <param name="id">zero-based node ID</param>
 public BinaryGuideTreeNode(int id)
 {
     LeftChildren     = null;
     RightChildren    = null;
     Parent           = null;
     ProfileAlignment = new ProfileAlignment();
     _id             = id;
     _sequenceID     = id;
     _eString        = new List <int>();
     NeedReAlignment = true;
 }
Exemple #4
0
        /// <summary>
        /// Combine two profileAlignments into one if they are aligned already
        /// </summary>
        /// <param name="profileAlignmentA">first profile alignment</param>
        /// <param name="profileAlignmentB">second profile alignment</param>
        public static IProfileAlignment GenerateProfileAlignment(IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB)
        {
            IProfiles profileMatrix = Profiles.GenerateProfiles(
                profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix,
                profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences);

            IProfileAlignment profileAlignment = new ProfileAlignment();

            profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences + profileAlignmentB.NumberOfSequences;
            profileAlignment.ProfilesMatrix    = profileMatrix;

            return(profileAlignment);
        }
Exemple #5
0
        /// <summary>
        /// The profiles of two subsets is extracted from the current multiple alignment.
        /// Columns containing no residues, i.e. indels only, are discarded.
        ///
        /// This method is used in alignment refinement, when the guide tree is cut into two,
        /// the sequences (leaf nodes) are separated into two subsets. This method generates
        /// two profileAlignments for the two subtrees by extracting profiles of the two subsets
        /// of sequences.
        /// </summary>
        /// <param name="alignedSequences">a set of aligned sequences</param>
        /// <param name="sequenceIndicesA">the subset sequence indices of subtree A</param>
        /// <param name="sequenceIndicesB">the subset sequence indices of subtree B</param>
        /// <param name="allIndelPositions">the list of all-indel positions that have been removed when constructing</param>
        public static IProfileAlignment[] ProfileExtraction(List <ISequence> alignedSequences,
                                                            List <int> sequenceIndicesA, List <int> sequenceIndicesB,
                                                            out List <int>[] allIndelPositions)
        {
            allIndelPositions = new List <int> [2];
            IProfiles         profileA          = Profiles.GenerateProfiles(alignedSequences, sequenceIndicesA, out allIndelPositions[0]);
            IProfiles         profileB          = Profiles.GenerateProfiles(alignedSequences, sequenceIndicesB, out allIndelPositions[1]);
            IProfileAlignment profileAlignmentA = new ProfileAlignment();
            IProfileAlignment profileAlignmentB = new ProfileAlignment();

            profileAlignmentA.ProfilesMatrix    = profileA;
            profileAlignmentB.ProfilesMatrix    = profileB;
            profileAlignmentA.NumberOfSequences = sequenceIndicesA.Count;
            profileAlignmentB.NumberOfSequences = sequenceIndicesB.Count;

            return(new IProfileAlignment[2] {
                profileAlignmentA, profileAlignmentB
            });
        }
Exemple #6
0
        /// <summary>
        /// Combine two profileAlignments with alignment operation array from dynamic programming.
        /// The dynamic programming algorithm returns two arrays containing the alignment operations
        /// on the two profiles. This method applies the operation information in the two arrays to
        /// the two original profiles, and combine them into a new aligned profile, and put into the
        /// newly generated profileAlignment.
        /// </summary>
        /// <param name="profileAlignmentA">first profile alignment</param>
        /// <param name="profileAlignmentB">second profile alignment</param>
        /// <param name="aAligned">aligned interger array generated by dynamic programming</param>
        /// <param name="bAligned">aligned interger array generated by dynamic programming</param>
        /// <param name="gapCode">the gap integer code defined in dynamic programming class</param>
        public static IProfileAlignment GenerateProfileAlignment(
            IProfileAlignment profileAlignmentA,
            IProfileAlignment profileAlignmentB,
            int[] aAligned,
            int[] bAligned,
            int gapCode)
        {
            IProfiles profileMatrix = Profiles.GenerateProfiles(
                profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix,
                profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences,
                aAligned, bAligned, gapCode);

            IProfileAlignment profileAlignment = new ProfileAlignment();

            profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences +
                                                 profileAlignmentB.NumberOfSequences;
            profileAlignment.ProfilesMatrix = profileMatrix;

            return(profileAlignment);
        }
Exemple #7
0
        /// <summary>
        /// Main pregressive alignment algorithm aligns a set of sequences guided by
        /// a binary tree.
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="tree">a binary guide tree</param>
        public void Align(IList <ISequence> sequences, BinaryGuideTree tree)
        {
            SequenceWeighting sequenceWeighting = null;

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                sequenceWeighting = new SequenceWeighting(tree);

                /*
                 * for (int i = 0; i < sequenceWeighting.Weights.Length; ++i)
                 * {
                 *  sequenceWeighting.Weights[i] = 1;
                 * }
                 */
            }

            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty set of sequences");
            }
            IAlphabet alphabet = sequences[0].Alphabet;

            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (sequences[i].Alphabet != alphabet)
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]);
                    tree.Nodes[i].Weight           = sequenceWeighting.Weights[i];
                });
            }
            else
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                });
            }

            // Iterate internal nodes;
            // as defined in the tree, the last node is the root
            for (int i = sequences.Count; i < tree.Nodes.Count; ++i)
            {
                if (tree.Nodes[i].NeedReAlignment)
                {
                    // pull out its children
                    _nodeA = tree.Nodes[i].LeftChildren;
                    _nodeB = tree.Nodes[i].RightChildren;

                    if (PAMSAMMultipleSequenceAligner.UseWeights)
                    {
                        _profileAligner.Weights    = new float[2];
                        _profileAligner.Weights[0] = _nodeA.Weight;
                        _profileAligner.Weights[1] = _nodeB.Weight;

                        tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight;
                    }

                    // align two profiles
                    ProfileAlignment result = null;
                    if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences)
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeA.ProfileAlignment, _nodeB.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                    }
                    else
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeB.ProfileAlignment, _nodeA.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                    }


                    // children node profiles can be deleted
                    _nodeA.ProfileAlignment.Clear();
                    _nodeB.ProfileAlignment.Clear();
                }
            }

            // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings
            try
            {
                _alignedSequences = new List <ISequence>(sequences.Count);
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory", ex.InnerException);
            }

            for (int i = 0; i < sequences.Count; ++i)
            {
                _alignedSequences.Add(null);
            }

            Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                ISequence seq = sequences[i];
                BinaryGuideTreeNode node;
                node = tree.Nodes[i];
                while (!node.IsRoot)
                {
                    seq  = _profileAligner.GenerateSequenceFromEString(node.EString, seq);
                    node = node.Parent;
                }
                _alignedSequences[i] = seq;
            });
        }
Exemple #8
0
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="sequences">input unaligned sequences</param>
        public IList <MBF.Algorithms.Alignment.ISequenceAlignment> Align(IList <ISequence> sequences)
        {
            // Initializations
            if (sequences.Count > 0)
            {
                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(sequences[0].Alphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = sequences[0].Alphabet;
                }
            }

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;

            switch (_profileAlignerName)
            {
            case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new NeedlemanWunschProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new NeedlemanWunschProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            case (ProfileAlignerNames.SmithWatermanProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new SmithWatermanProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            default:
                throw new ArgumentException("Invalid profile aligner name");
            }

            _alignedSequences = new List <ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            Performance.Snapshot("Stage 1");
            // Generate DistanceMatrix
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, _kmerLength, _moleculeType, _distanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

            // Generate Guide Tree
            BinaryGuideTree binaryGuideTree =
                new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);

            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > _alignmentScoreA)
            {
                _alignmentScoreA   = currentScore;
                _alignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (_alignmentScoreA > _alignmentScore)
            {
                _alignmentScore   = _alignmentScoreA;
                _alignedSequences = _alignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                _alignedSequencesB = _alignedSequencesA;
                _alignedSequencesC = _alignedSequencesA;
                _alignmentScoreB   = _alignmentScoreA;
                _alignmentScoreC   = _alignmentScoreA;
            }
            else
            {
                BinaryGuideTree               binaryGuideTreeB              = null;
                IHierarchicalClustering       hierarcicalClusteringB        = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (PAMSAMMultipleSequenceAligner.UseStageB)
                {
                    // STAGE 2
                    Performance.Snapshot("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    int iterateTime = 0;

                    while (true)
                    {
                        ++iterateTime;
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                     (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreB)
                        {
                            _alignmentScoreB   = currentScore;
                            _alignedSequencesB = progressiveAlignerB.AlignedSequences;
                            break;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (_alignmentScoreB > _alignmentScore)
                    {
                        _alignmentScore   = _alignmentScoreB;
                        _alignedSequences = _alignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                Performance.Snapshot("Stage 3");
                // refinement
                //int maxRefineMentTime = sequences.Count * 2 - 2;
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                _alignedSequencesC = new List <ISequence>(sequences.Count);
                for (int i = 0; i < sequences.Count; ++i)
                {
                    _alignedSequencesC.Add(new Sequence(_alphabet, _alignedSequences[i].ToString()));
                }

                List <int>[]        leafNodeIndices            = null;
                List <int>[]        allIndelPositions          = null;
                IProfileAlignment[] separatedProfileAlignments = null;
                List <int>[]        eStrings = null;

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    Performance.Snapshot("Refinement iter " + refinementTime.ToString());
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        allIndelPositions = new List <int> [2];

                        separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        eStrings = new List <int> [2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i =>
                            {
                                Sequence seq      = new Sequence(_alphabet, "");
                                seq.IsReadOnly    = false;
                                int indexAllIndel = 0;
                                for (int j = 0; j < _alignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seq.Add(_alignedSequencesC[i][j]);
                                    }
                                }

                                seq                   = profileAligner.GenerateSequenceFromEString(eStrings[set], seq);
                                seq.IsReadOnly        = true;
                                _alignedSequencesC[i] = seq;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreC)
                        {
                            _alignmentScoreC = currentScore;
                            needRefinement   = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                         (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }
                }
                if (_alignmentScoreC > _alignmentScore)
                {
                    _alignmentScore   = _alignmentScoreC;
                    _alignedSequences = _alignedSequencesC;
                }
                Performance.Snapshot("Stop Stage 3");
            }

            //just for the purpose of integrating PW and MSA with the same output
            IList <MBF.Algorithms.Alignment.ISequenceAlignment> results = new List <MBF.Algorithms.Alignment.ISequenceAlignment>();

            return(results);
        }