public HomeController(IParserManager parserManager, IHierarchicalClustering hierarchicalClustering, IClusterSerializer clusterSerializer) { ParserManager = parserManager; HierarchicalClustering = hierarchicalClustering; ClusterSerializer = clusterSerializer; }
/// <summary> /// Construct a tree by hierarchical clustering method. /// /// The node list is already generated in the hierarchical clustering method /// and the root will be the last node in the list /// </summary> /// <param name="hCluster">hierarcical clustering class object</param> public BinaryGuideTree(IHierarchicalClustering hCluster) { if (hCluster == null) { throw new ArgumentException("null Hierarchical clustering class"); } if (hCluster.Nodes.Count == 0) { throw new ArgumentException("empty node list in Hierarchical clustering class"); } _nodes = hCluster.Nodes; _edges = hCluster.Edges; _root = hCluster.Nodes[hCluster.Nodes.Count - 1]; _numberOfNodes = hCluster.Nodes.Count; _numberOfLeaves = (_numberOfNodes + 2) / 2; }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="inputSequences"></param> /// <returns></returns> public IList <Bio.Algorithms.Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences) { List <ISequence> sequences = inputSequences.ToList(); // Initializations if (sequences.Count > 0) { if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(_alphabet); } else { ConsensusResolver.SequenceAlphabet = _alphabet; } } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (_profileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (_degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } _alignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 Performance.Snapshot("Stage 1"); // Generate DistanceMatrix KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, _kmerLength, _alphabet, _distanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree BinaryGuideTree binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreA) { _alignmentScoreA = currentScore; _alignedSequencesA = progressiveAlignerA.AlignedSequences; } if (_alignmentScoreA > _alignmentScore) { _alignmentScore = _alignmentScoreA; _alignedSequences = _alignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { _alignedSequencesB = _alignedSequencesA; _alignedSequencesC = _alignedSequencesA; _alignmentScoreB = _alignmentScoreA; _alignmentScoreC = _alignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (PAMSAMMultipleSequenceAligner.UseStageB) { // STAGE 2 Performance.Snapshot("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment int iterateTime = 0; while (true) { ++iterateTime; kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreB) { _alignmentScoreB = currentScore; _alignedSequencesB = progressiveAlignerB.AlignedSequences; break; } else { break; } } if (_alignmentScoreB > _alignmentScore) { _alignmentScore = _alignmentScoreB; _alignedSequences = _alignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 Performance.Snapshot("Stage 3"); // refinement //int maxRefineMentTime = sequences.Count * 2 - 2; int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; _alignedSequencesC = new List <ISequence>(sequences.Count); for (int i = 0; i < sequences.Count; ++i) { _alignedSequencesC.Add( new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), _alignedSequences[i].ToArray()) { ID = _alignedSequences[i].ID, Metadata = _alignedSequences[i].Metadata }); } List <int>[] leafNodeIndices = null; List <int>[] allIndelPositions = null; IProfileAlignment[] separatedProfileAlignments = null; List <int>[] eStrings = null; while (refinementTime < maxRefineMentTime) { ++refinementTime; Performance.Snapshot("Refinement iter " + refinementTime.ToString()); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); allIndelPositions = new List <int> [2]; separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < _alignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(_alignedSequencesC[i][j]); } } _alignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), seqBytes.ToArray())); _alignedSequencesC[i].ID = _alignedSequencesC[i].ID; (_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > _alignmentScoreC) { _alignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (_alignmentScoreC > _alignmentScore) { _alignmentScore = _alignmentScoreC; _alignedSequences = _alignedSequencesC; } Performance.Snapshot("Stop Stage 3"); } //just for the purpose of integrating PW and MSA with the same output IList <Bio.Algorithms.Alignment.ISequenceAlignment> results = new List <Bio.Algorithms.Alignment.ISequenceAlignment>(); return(results); }
/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="sequences">Input sequences</param> /// <returns>Alignment results</returns> private void DoAlignment(IList <ISequence> sequences) { Debug.Assert(this.alphabet != null); Debug.Assert(sequences.Count > 0); // Initializations if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(this.alphabet); } else { ConsensusResolver.SequenceAlphabet = this.alphabet; } // Get ProfileAligner ready IProfileAligner profileAligner = null; switch (ProfileAlignerName) { case (ProfileAlignerNames.NeedlemanWunschProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new NeedlemanWunschProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new NeedlemanWunschProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; case (ProfileAlignerNames.SmithWatermanProfileAligner): if (this.degreeOfParallelism == 1) { profileAligner = new SmithWatermanProfileAlignerSerial( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } else { profileAligner = new SmithWatermanProfileAlignerParallel( SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions); } break; default: throw new ArgumentException("Invalid profile aligner name"); } this.AlignedSequences = new List <ISequence>(sequences.Count); float currentScore = 0; // STAGE 1 ReportLog("Stage 1"); // Generate DistanceMatrix var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName); // Hierarchical clustering IHierarchicalClustering hierarcicalClustering = new HierarchicalClusteringParallel (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering); // Progressive Alignment IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner); progressiveAlignerA.Align(sequences, binaryGuideTree); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreA) { this.AlignmentScoreA = currentScore; this.AlignedSequencesA = progressiveAlignerA.AlignedSequences; } if (this.AlignmentScoreA > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreA; this.AlignedSequences = this.AlignedSequencesA; } if (PAMSAMMultipleSequenceAligner.FasterVersion) { this.AlignedSequencesB = this.AlignedSequencesA; this.AlignedSequencesC = this.AlignedSequencesA; this.AlignmentScoreB = this.AlignmentScoreA; this.AlignmentScoreC = this.AlignmentScoreA; } else { BinaryGuideTree binaryGuideTreeB = null; IHierarchicalClustering hierarcicalClusteringB = null; KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator(); if (UseStageB) { // STAGE 2 ReportLog("Stage 2"); // Generate DistanceMatrix from Multiple Sequence Alignment while (true) { kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences); // Hierarchical clustering hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); // Generate Guide Tree binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree); binaryGuideTree = binaryGuideTreeB; // Progressive Alignment IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner); progressiveAlignerB.Align(sequences, binaryGuideTreeB); currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreB) { this.AlignmentScoreB = currentScore; this.AlignedSequencesB = progressiveAlignerB.AlignedSequences; } break; } if (this.AlignmentScoreB > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreB; this.AlignedSequences = this.AlignedSequencesB; } } else { binaryGuideTreeB = binaryGuideTree; } // STAGE 3 ReportLog("Stage 3"); // refinement int maxRefineMentTime = 1; if (sequences.Count == 2) { maxRefineMentTime = 0; } int refinementTime = 0; this.AlignedSequencesC = new List <ISequence>(this.AlignedSequences.Count); foreach (ISequence t in this.AlignedSequences) { this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray()) { ID = t.ID, // Do not shallow copy dictionary //Metadata = t.Metadata }); } while (refinementTime < maxRefineMentTime) { ++refinementTime; ReportLog("Refinement iter " + refinementTime); bool needRefinement = false; for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex) { List <int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex); List <int>[] allIndelPositions = new List <int> [2]; IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions); List <int>[] eStrings = new List <int> [2]; if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences) { profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB); } else { profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]); eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB); eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA); } for (int set = 0; set < 2; ++set) { Parallel.ForEach(leafNodeIndices[set], ParallelOption, i => { //Sequence seq = new Sequence(_alphabet, ""); List <byte> seqBytes = new List <byte>(); int indexAllIndel = 0; for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j) { if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel]) { ++indexAllIndel; } else { seqBytes.Add(this.AlignedSequencesC[i][j]); } } this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray())); this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID; // Do not shallow copy dictionary //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata; }); } currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost); if (currentScore > this.AlignmentScoreC) { this.AlignmentScoreC = currentScore; needRefinement = true; // recreate the tree kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC); hierarcicalClusteringB = new HierarchicalClusteringParallel (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName); binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB); break; } } if (!needRefinement) { refinementTime = maxRefineMentTime; break; } } if (this.AlignmentScoreC > this.AlignmentScore) { this.AlignmentScore = this.AlignmentScoreC; this.AlignedSequences = this.AlignedSequencesC; } ReportLog("Stop Stage 3"); } }
/// <summary> /// Get the binary tree object using hierarchical clustering object /// </summary> /// <param name="hierarchicalClustering">hierarchical Clustering</param> /// <returns>Binary guide tree</returns> private static BinaryGuideTree GetBinaryTree(IHierarchicalClustering hierarchicalClustering) { // Generate Guide Tree var binaryGuideTree = new BinaryGuideTree(hierarchicalClustering); return binaryGuideTree; }