/// <summary> /// Performs Stage 1, 2, and 3 as described in class description. /// </summary> /// <param name="inputSequences">Input sequences</param> /// <returns>Alignment results</returns> public IList <Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences) { // Reset all our data in case this same instance is used multiple times. _alignedSequences = _alignedSequencesA = _alignedSequencesB = _alignedSequencesC = null; _alignmentScore = _alignmentScoreA = _alignmentScoreB = _alignmentScoreC = float.MinValue; // Get our list of sequences. List <ISequence> sequences = inputSequences.ToList(); if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Assign the gap open/extension cost if it hasn't been assigned. if (GapOpenCost == 0) { GapOpenCost = -4; } if (GapExtensionCost == 0) { GapExtensionCost = -1; } Performance.Start(); // Assign the alphabet SetAlphabet(sequences, SimilarityMatrix, true); MsaUtils.SetProfileItemSets(_alphabet); Performance.Snapshot("Start Aligning"); // Work... DoAlignment(sequences); // just for the purpose of integrating PW and MSA with the same output var alignment = new Alignment.SequenceAlignment(); IAlignedSequence aSequence = new AlignedSequence(); foreach (var alignedSequence in AlignedSequences) { aSequence.Sequences.Add(alignedSequence); } foreach (var inputSequence in sequences) { alignment.Sequences.Add(inputSequence); } alignment.AlignedSequences.Add(aSequence); return(new List <Alignment.ISequenceAlignment>() { alignment }); }
/// <summary> /// Construct an aligner /// </summary> /// <param name="sequences">input sequences</param> /// <param name="kmerLength">positive integer of kmer length</param> /// <param name="distanceFunctionName">enum: distance function name</param> /// <param name="hierarchicalClusteringMethodName">enum: cluster update method</param> /// <param name="profileAlignerMethodName">enum: profile-profile aligner name</param> /// <param name="profileFunctionName">enum: profile-profile distance function</param> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="gapOpenPenalty">negative gapOpenPenalty</param> /// <param name="gapExtendPenalty">negative gapExtendPenalty</param> /// <param name="numberOfPartitions">the number of partitions in dynamic programming</param> /// <param name="degreeOfParallelism">degree of parallelism option for parallel extension</param> public PAMSAMMultipleSequenceAligner( IList <ISequence> sequences, int kmerLength, DistanceFunctionTypes distanceFunctionName, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, ProfileAlignerNames profileAlignerMethodName, ProfileScoreFunctionNames profileFunctionName, SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtendPenalty, int numberOfPartitions, int degreeOfParallelism) { Performance.Start(); if (null == sequences) { throw new ArgumentNullException("sequences"); } if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Set parallel extension option if (degreeOfParallelism <= 0) { throw new ArgumentException("Invalid parallel degree parameter"); } PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions { MaxDegreeOfParallelism = degreeOfParallelism }; if (numberOfPartitions <= 0) { throw new ArgumentException("Invalid number of partition parameter"); } _numberOfPartitions = numberOfPartitions; // Validate data type _alphabet = sequences[0].Alphabet; Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i => { if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, _alphabet)) { throw new ArgumentException("Inconsistent sequence alphabet"); } }); List <String> similarityMatrixDNA = new List <String>(); similarityMatrixDNA.Add("AmbiguousDNA"); List <String> similarityMatrixRNA = new List <String>(); similarityMatrixRNA.Add("AmbiguousRNA"); List <String> similarityMatrixProtein = new List <String>(); similarityMatrixProtein.Add("BLOSUM45"); similarityMatrixProtein.Add("BLOSUM50"); similarityMatrixProtein.Add("BLOSUM62"); similarityMatrixProtein.Add("BLOSUM80"); similarityMatrixProtein.Add("BLOSUM90"); similarityMatrixProtein.Add("PAM250"); similarityMatrixProtein.Add("PAM30"); similarityMatrixProtein.Add("PAM70"); if (_alphabet is DnaAlphabet) { if (!similarityMatrixDNA.Contains(similarityMatrix.Name)) { throw new ArgumentException("Inconsistent similarity matrix"); } } else if (_alphabet is ProteinAlphabet) { if (!similarityMatrixProtein.Contains(similarityMatrix.Name)) { throw new ArgumentException("Inconsistent similarity matrix"); } } else if (_alphabet is RnaAlphabet) { if (!similarityMatrixRNA.Contains(similarityMatrix.Name)) { throw new ArgumentException("Inconsistent similarity matrix"); } } else { throw new ArgumentException("Invalid alphabet"); } // Initialize parameters _kmerLength = kmerLength; _distanceFunctionName = distanceFunctionName; _hierarchicalClusteringMethodName = hierarchicalClusteringMethodName; _profileAlignerName = profileAlignerMethodName; _profileProfileFunctionName = profileFunctionName; SimilarityMatrix = similarityMatrix; GapOpenCost = gapOpenPenalty; GapExtensionCost = gapExtendPenalty; MsaUtils.SetProfileItemSets(_alphabet); Performance.Snapshot("Start Aligning"); // Work... Align(sequences); }
/// <summary> /// Construct an aligner and run the alignment. /// </summary> /// <param name="sequences">input sequences</param> /// <param name="kmerLength">positive integer of kmer length</param> /// <param name="distanceFunctionName">enum: distance function name</param> /// <param name="hierarchicalClusteringMethodName">enum: cluster update method</param> /// <param name="profileAlignerMethodName">enum: profile-profile aligner name</param> /// <param name="profileFunctionName">enum: profile-profile distance function</param> /// <param name="similarityMatrix">similarity matrix</param> /// <param name="gapOpenPenalty">negative gapOpenPenalty</param> /// <param name="gapExtendPenalty">negative gapExtendPenalty</param> /// <param name="numberOfPartitions">the number of partitions in dynamic programming</param> /// <param name="degreeOfParallelism">degree of parallelism option for parallel extension</param> public PAMSAMMultipleSequenceAligner( IList <ISequence> sequences, int kmerLength, DistanceFunctionTypes distanceFunctionName, UpdateDistanceMethodsTypes hierarchicalClusteringMethodName, ProfileAlignerNames profileAlignerMethodName, ProfileScoreFunctionNames profileFunctionName, SimilarityMatrix similarityMatrix, int gapOpenPenalty, int gapExtendPenalty, int numberOfPartitions, int degreeOfParallelism) { Performance.Start(); if (null == sequences) { throw new ArgumentNullException("sequences"); } if (sequences.Count == 0) { throw new ArgumentException("Empty input sequences"); } // Set parallel extension option if (degreeOfParallelism <= 0) { throw new ArgumentException("Invalid parallel degree parameter"); } //_degreeOfParallelism = degreeOfParallelism; parallelOption = new ParallelOptions { MaxDegreeOfParallelism = degreeOfParallelism }; if (numberOfPartitions <= 0) { throw new ArgumentException("Invalid number of partition parameter"); } _numberOfPartitions = numberOfPartitions; // Assign the alphabet SetAlphabet(sequences, similarityMatrix, false); // Initialize parameters KmerLength = kmerLength; DistanceFunctionName = distanceFunctionName; HierarchicalClusteringMethodName = hierarchicalClusteringMethodName; ProfileAlignerName = profileAlignerMethodName; ProfileProfileFunctionName = profileFunctionName; SimilarityMatrix = similarityMatrix; GapOpenCost = gapOpenPenalty; GapExtensionCost = gapExtendPenalty; MsaUtils.SetProfileItemSets(_alphabet); Performance.Snapshot("Start Aligning"); // Work... DoAlignment(sequences); }