/// <summary> /// Generate IProfileAlignment from a set of aligned sequences /// </summary> /// <param name="sequences">aligned sequences</param> /// <param name="weights">sequence weights</param> public static IProfileAlignment GenerateProfileAlignment(ICollection<ISequence> sequences, float[] weights) { IProfiles profileMatrix = Profiles.GenerateProfiles(sequences, weights); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = sequences.Count; profileAlignment.ProfilesMatrix = profileMatrix; return profileAlignment; }
/// <summary> /// Generate a profileAlignment from one single sequence /// The set of sequence items of the seq should be the same as /// 'static ItemSet' of the IProfiles. /// </summary> /// <param name="seq">an input sequence</param> /// <param name="weight">sequence weight</param> public static IProfileAlignment GenerateProfileAlignment(ISequence seq, float weight) { IProfiles profileMatrix = Profiles.GenerateProfiles(seq, weight); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = 1; profileAlignment.ProfilesMatrix = profileMatrix; return(profileAlignment); }
/// <summary> /// Generate IProfileAlignment from a set of aligned sequences /// </summary> /// <param name="sequences">aligned sequences</param> /// <param name="weights">sequence weights</param> public static IProfileAlignment GenerateProfileAlignment(ICollection <ISequence> sequences, float[] weights) { IProfiles profileMatrix = Profiles.GenerateProfiles(sequences, weights); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = sequences.Count; profileAlignment.ProfilesMatrix = profileMatrix; return(profileAlignment); }
/// <summary> /// Construct a node with assigned ID /// </summary> /// <param name="id">zero-based node ID</param> public BinaryGuideTreeNode(int id) { LeftChildren = null; RightChildren = null; Parent = null; ProfileAlignment = new ProfileAlignment(); _id = id; _sequenceID = id; _eString = new List <int>(); NeedReAlignment = true; }
/// <summary> /// Combine two profileAlignments into one if they are aligned already /// </summary> /// <param name="profileAlignmentA">first profile alignment</param> /// <param name="profileAlignmentB">second profile alignment</param> public static IProfileAlignment GenerateProfileAlignment(IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB) { IProfiles profileMatrix = Profiles.GenerateProfiles( profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix, profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences + profileAlignmentB.NumberOfSequences; profileAlignment.ProfilesMatrix = profileMatrix; return(profileAlignment); }
/// <summary> /// The profiles of two subsets is extracted from the current multiple alignment. /// Columns containing no residues, i.e. indels only, are discarded. /// /// This method is used in alignment refinement, when the guide tree is cut into two, /// the sequences (leaf nodes) are separated into two subsets. This method generates /// two profileAlignments for the two subtrees by extracting profiles of the two subsets /// of sequences. /// </summary> /// <param name="alignedSequences">a set of aligned sequences</param> /// <param name="sequenceIndicesA">the subset sequence indices of subtree A</param> /// <param name="sequenceIndicesB">the subset sequence indices of subtree B</param> /// <param name="allIndelPositions">the list of all-indel positions that have been removed when constructing</param> public static IProfileAlignment[] ProfileExtraction(List <ISequence> alignedSequences, List <int> sequenceIndicesA, List <int> sequenceIndicesB, out List <int>[] allIndelPositions) { allIndelPositions = new List <int> [2]; IProfiles profileA = Profiles.GenerateProfiles(alignedSequences, sequenceIndicesA, out allIndelPositions[0]); IProfiles profileB = Profiles.GenerateProfiles(alignedSequences, sequenceIndicesB, out allIndelPositions[1]); IProfileAlignment profileAlignmentA = new ProfileAlignment(); IProfileAlignment profileAlignmentB = new ProfileAlignment(); profileAlignmentA.ProfilesMatrix = profileA; profileAlignmentB.ProfilesMatrix = profileB; profileAlignmentA.NumberOfSequences = sequenceIndicesA.Count; profileAlignmentB.NumberOfSequences = sequenceIndicesB.Count; return(new IProfileAlignment[2] { profileAlignmentA, profileAlignmentB }); }
/// <summary> /// Combine two profileAlignments with alignment operation array from dynamic programming. /// The dynamic programming algorithm returns two arrays containing the alignment operations /// on the two profiles. This method applies the operation information in the two arrays to /// the two original profiles, and combine them into a new aligned profile, and put into the /// newly generated profileAlignment. /// </summary> /// <param name="profileAlignmentA">first profile alignment</param> /// <param name="profileAlignmentB">second profile alignment</param> /// <param name="aAligned">aligned interger array generated by dynamic programming</param> /// <param name="bAligned">aligned interger array generated by dynamic programming</param> /// <param name="gapCode">the gap integer code defined in dynamic programming class</param> public static IProfileAlignment GenerateProfileAlignment( IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB, int[] aAligned, int[] bAligned, int gapCode) { IProfiles profileMatrix = Profiles.GenerateProfiles( profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix, profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences, aAligned, bAligned, gapCode); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences + profileAlignmentB.NumberOfSequences; profileAlignment.ProfilesMatrix = profileMatrix; return(profileAlignment); }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="inputSequences"></param> /// <returns></returns> public IList <Bio.Algorithms.Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences) { List <ISequence> sequences = inputSequences.ToList(); // Initializations if (sequences.Count > 0) { if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(_alphabet); } else { ConsensusResolver.SequenceAlphabet = _alphabet; } } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (_profileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } _alignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 Performance.Snapshot("Stage 1"); // Generate DistanceMatrix KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, _kmerLength, _alphabet, _distanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree BinaryGuideTree binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreA) { _alignmentScoreA = currentScore; _alignedSequencesA = progressiveAlignerA.AlignedSequences; } if (_alignmentScoreA > _alignmentScore) { _alignmentScore = _alignmentScoreA; _alignedSequences = _alignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { _alignedSequencesB = _alignedSequencesA; _alignedSequencesC = _alignedSequencesA; _alignmentScoreB = _alignmentScoreA; _alignmentScoreC = _alignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (PAMSAMMultipleSequenceAligner.UseStageB) { // STAGE 2 Performance.Snapshot("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment int iterateTime = 0; while (true) { ++iterateTime; kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreB) { _alignmentScoreB = currentScore; _alignedSequencesB = progressiveAlignerB.AlignedSequences; break; } else { break; } } if (_alignmentScoreB > _alignmentScore) { _alignmentScore = _alignmentScoreB; _alignedSequences = _alignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 Performance.Snapshot("Stage 3"); // refinement //int maxRefineMentTime = sequences.Count * 2 - 2; int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; _alignedSequencesC = new List <ISequence>(sequences.Count); for (int i = 0; i < sequences.Count; ++i) { _alignedSequencesC.Add( new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), _alignedSequences[i].ToArray()) { ID = _alignedSequences[i].ID, Metadata = _alignedSequences[i].Metadata }); } List <int>[] leafNodeIndices = null; List <int>[] allIndelPositions = null; IProfileAlignment[] separatedProfileAlignments = null; List <int>[] eStrings = null; while (refinementTime < maxRefineMentTime) { ++refinementTime; Performance.Snapshot("Refinement iter " + refinementTime.ToString()); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); allIndelPositions = new List <int> [2]; separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < _alignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(_alignedSequencesC[i][j]); } } _alignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), seqBytes.ToArray())); _alignedSequencesC[i].ID = _alignedSequencesC[i].ID; (_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreC) { _alignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (_alignmentScoreC > _alignmentScore) { _alignmentScore = _alignmentScoreC; _alignedSequences = _alignedSequencesC; } Performance.Snapshot("Stop Stage 3"); } //just for the purpose of integrating PW and MSA with the same output IList <Bio.Algorithms.Alignment.ISequenceAlignment> results = new List <Bio.Algorithms.Alignment.ISequenceAlignment>(); return(results); }
/// <summary> /// Combine two profileAlignments into one if they are aligned already /// </summary> /// <param name="profileAlignmentA">first profile alignment</param> /// <param name="profileAlignmentB">second profile alignment</param> public static IProfileAlignment GenerateProfileAlignment(IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB) { IProfiles profileMatrix = Profiles.GenerateProfiles( profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix, profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences + profileAlignmentB.NumberOfSequences; profileAlignment.ProfilesMatrix = profileMatrix; return profileAlignment; }
/// <summary> /// The profiles of two subsets is extracted from the current multiple alignment. /// Columns containing no residues, i.e. indels only, are discarded. /// /// This method is used in alignment refinement, when the guide tree is cut into two, /// the sequences (leaf nodes) are separated into two subsets. This method generates /// two profileAlignments for the two subtrees by extracting profiles of the two subsets /// of sequences. /// </summary> /// <param name="alignedSequences">a set of aligned sequences</param> /// <param name="sequenceIndicesA">the subset sequence indices of subtree A</param> /// <param name="sequenceIndicesB">the subset sequence indices of subtree B</param> /// <param name="allIndelPositions">the list of all-indel positions that have been removed when constructing</param> public static IProfileAlignment[] ProfileExtraction(IList<ISequence> alignedSequences, IList<int> sequenceIndicesA, IList<int> sequenceIndicesB, out List<int>[] allIndelPositions) { allIndelPositions = new List<int>[2]; IProfiles profileA = Profiles.GenerateProfiles(alignedSequences, sequenceIndicesA, out allIndelPositions[0]); IProfiles profileB = Profiles.GenerateProfiles(alignedSequences, sequenceIndicesB, out allIndelPositions[1]); IProfileAlignment profileAlignmentA = new ProfileAlignment(); IProfileAlignment profileAlignmentB = new ProfileAlignment(); profileAlignmentA.ProfilesMatrix = profileA; profileAlignmentB.ProfilesMatrix = profileB; profileAlignmentA.NumberOfSequences = sequenceIndicesA.Count; profileAlignmentB.NumberOfSequences = sequenceIndicesB.Count; return new IProfileAlignment[2] { profileAlignmentA, profileAlignmentB }; }
/// <summary> /// Combine two profileAlignments with alignment operation array from dynamic programming. /// The dynamic programming algorithm returns two arrays containing the alignment operations /// on the two profiles. This method applies the operation information in the two arrays to /// the two original profiles, and combine them into a new aligned profile, and put into the /// newly generated profileAlignment. /// </summary> /// <param name="profileAlignmentA">first profile alignment</param> /// <param name="profileAlignmentB">second profile alignment</param> /// <param name="aAligned">aligned integer array generated by dynamic programming</param> /// <param name="bAligned">aligned integer array generated by dynamic programming</param> /// <param name="gapCode">the gap integer code defined in dynamic programming class</param> /// <param name="weights">the weights of two profileAlignments</param> public static IProfileAlignment GenerateProfileAlignment( IProfileAlignment profileAlignmentA, IProfileAlignment profileAlignmentB, int[] aAligned, int[] bAligned, int gapCode, float[] weights) { IProfiles profileMatrix = Profiles.GenerateProfiles( profileAlignmentA.ProfilesMatrix, profileAlignmentB.ProfilesMatrix, profileAlignmentA.NumberOfSequences, profileAlignmentB.NumberOfSequences, aAligned, bAligned, gapCode, weights); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = profileAlignmentA.NumberOfSequences + profileAlignmentB.NumberOfSequences; profileAlignment.ProfilesMatrix = profileMatrix; return profileAlignment; }
/// <summary> /// Generate a profileAlignment from one single sequence /// The set of sequence items of the seq should be the same as /// 'static ItemSet' of the IProfiles. /// </summary> /// <param name="seq">an input sequence</param> /// <param name="weight">sequence weight</param> public static IProfileAlignment GenerateProfileAlignment(ISequence seq, float weight) { IProfiles profileMatrix = Profiles.GenerateProfiles(seq, weight); IProfileAlignment profileAlignment = new ProfileAlignment(); profileAlignment.NumberOfSequences = 1; profileAlignment.ProfilesMatrix = profileMatrix; return profileAlignment; }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="sequences">Input sequences</param> /// <returns>Alignment results</returns> private void DoAlignment(IList <ISequence> sequences) { Debug.Assert(this.alphabet != null); Debug.Assert(sequences.Count > 0); // Initializations if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(this.alphabet); } else { ConsensusResolver.SequenceAlphabet = this.alphabet; } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (ProfileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } this.AlignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 ReportLog("Stage 1"); // Generate DistanceMatrix var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreA) { this.AlignmentScoreA = currentScore; this.AlignedSequencesA = progressiveAlignerA.AlignedSequences; } if (this.AlignmentScoreA > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreA; this.AlignedSequences = this.AlignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { this.AlignedSequencesB = this.AlignedSequencesA; this.AlignedSequencesC = this.AlignedSequencesA; this.AlignmentScoreB = this.AlignmentScoreA; this.AlignmentScoreC = this.AlignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (UseStageB) { // STAGE 2 ReportLog("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment while (true) { kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreB) { this.AlignmentScoreB = currentScore; this.AlignedSequencesB = progressiveAlignerB.AlignedSequences; } break; } if (this.AlignmentScoreB > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreB; this.AlignedSequences = this.AlignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 ReportLog("Stage 3"); // refinement int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; this.AlignedSequencesC = new List <ISequence>(this.AlignedSequences.Count); foreach (ISequence t in this.AlignedSequences) { this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray()) { ID = t.ID, // Do not shallow copy dictionary //Metadata = t.Metadata }); } while (refinementTime < maxRefineMentTime) { ++refinementTime; ReportLog("Refinement iter " + refinementTime); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { List <int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); List <int>[] allIndelPositions = new List <int> [2]; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); List <int>[] eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], ParallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(this.AlignedSequencesC[i][j]); } } this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray())); this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID; // Do not shallow copy dictionary //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreC) { this.AlignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (this.AlignmentScoreC > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreC; this.AlignedSequences = this.AlignedSequencesC; } ReportLog("Stop Stage 3"); } }
/// <summary> /// Main pregressive alignment algorithm aligns a set of sequences guided by /// a binary tree. /// </summary> /// <param name="sequences">input sequences</param> /// <param name="tree">a binary guide tree</param> public void Align(IList <ISequence> sequences, BinaryGuideTree tree) { SequenceWeighting sequenceWeighting = null; if (PAMSAMMultipleSequenceAligner.UseWeights) { sequenceWeighting = new SequenceWeighting(tree); /* * for (int i = 0; i < sequenceWeighting.Weights.Length; ++i) * { * sequenceWeighting.Weights[i] = 1; * } */ } if (sequences.Count == 0) { throw new ArgumentException("Empty set of sequences"); } IAlphabet alphabet = sequences[0].Alphabet; Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, alphabet)) { throw new ArgumentException("Inconsistent sequence alphabet"); } }); if (PAMSAMMultipleSequenceAligner.UseWeights) { // Generate profile for leaf nodes Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]); tree.Nodes[i].Weight = sequenceWeighting.Weights[i]; }); } else { // Generate profile for leaf nodes Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]); }); } // Iterate internal nodes; // as defined in the tree, the last node is the root for (int i = sequences.Count; i < tree.Nodes.Count; ++i) { if (tree.Nodes[i].NeedReAlignment) { // pull out its children _nodeA = tree.Nodes[i].LeftChildren; _nodeB = tree.Nodes[i].RightChildren; if (PAMSAMMultipleSequenceAligner.UseWeights) { _profileAligner.Weights = new float[2]; _profileAligner.Weights[0] = _nodeA.Weight; _profileAligner.Weights[1] = _nodeB.Weight; tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight; } // align two profiles ProfileAlignment result = null; if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences) { result = (ProfileAlignment)_profileAligner.Align( _nodeA.ProfileAlignment, _nodeB.ProfileAlignment); // assign aligned profiles to the current internal node tree.Nodes[i].ProfileAlignment = result; // generate eString for the children nodes _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA); _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB); } else { result = (ProfileAlignment)_profileAligner.Align( _nodeB.ProfileAlignment, _nodeA.ProfileAlignment); // assign aligned profiles to the current internal node tree.Nodes[i].ProfileAlignment = result; // generate eString for the children nodes _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB); _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA); } // children node profiles can be deleted _nodeA.ProfileAlignment.Clear(); _nodeB.ProfileAlignment.Clear(); } } // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings try { _alignedSequences = new List <ISequence>(sequences.Count); } catch (OutOfMemoryException ex) { throw new Exception("Out of memory", ex.InnerException); } for (int i = 0; i < sequences.Count; ++i) { _alignedSequences.Add(null); } Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { ISequence seq = sequences[i]; BinaryGuideTreeNode node; node = tree.Nodes[i]; while (!node.IsRoot) { seq = _profileAligner.GenerateSequenceFromEString(node.EString, seq); node = node.Parent; } _alignedSequences[i] = seq; }); }
/// <summary> /// Construct a node with assigned ID /// </summary> /// <param name="id">zero-based node ID</param> public BinaryGuideTreeNode(int id) { LeftChildren = null; RightChildren = null; Parent = null; ProfileAlignment = new ProfileAlignment(); _id = id; _sequenceID = id; _eString = new List<int>(); NeedReAlignment = true; }