Example #1
0
        /// <summary>
        /// Validates input sequences and gap penalties.
        /// Checks that input sequences use the same alphabet.
        /// Checks that each symbol in the input sequences exists in the similarity matrix.
        /// Checks that gap penalties are less than or equal to 0.
        /// Throws exception if sequences fail these checks.
        /// Writes warning to ApplicationLog if gap penalty or penalties are positive.
        /// </summary>
        /// <param name="inputA">First input sequence.</param>
        /// <param name="inputB">Second input sequence.</param>
        protected void ValidateAlignInput(ISequence inputA, ISequence inputB)
        {
            if (inputA == null)
            {
                throw new ArgumentNullException("inputA");
            }

            if (inputB == null)
            {
                throw new ArgumentNullException("inputB");
            }

            if (!Alphabets.CheckIsFromSameBase(inputA.Alphabet, inputB.Alphabet))
            {
                throw new ArgumentException(Properties.Resource.InputAlphabetsMismatch);
            }

            if (null == this.InternalSimilarityMatrix)
            {
                throw new ArgumentException(Properties.Resource.SimilarityMatrixCannotBeNull);
            }

            if (!this.InternalSimilarityMatrix.ValidateSequence(inputA))
            {
                throw new ArgumentException(Properties.Resource.FirstInputSequenceMismatchSimilarityMatrix);
            }

            if (!this.InternalSimilarityMatrix.ValidateSequence(inputB))
            {
                throw new ArgumentException(Properties.Resource.SecondInputSequenceMismatchSimilarityMatrix);
            }
        }
Example #2
0
        /// <summary>
        /// Build the HTTP content for the request based on the parameters
        /// </summary>
        /// <returns>The request.</returns>
        public HttpContent BuildRequest(BlastRequestParameters blastParams)
        {
            if (string.IsNullOrWhiteSpace(blastParams.Database))
            {
                throw new ArgumentException("Database must be supplied.");
            }

            if (string.IsNullOrWhiteSpace(blastParams.Program))
            {
                throw new ArgumentException("Program must be supplied.");
            }

            if (blastParams.Sequences.Count == 0)
            {
                throw new ArgumentException("Must have at least one sequence.");
            }

            // Check that all sequences are same alphabet
            if (blastParams.Sequences.Count > 1)
            {
                ISequence primary = blastParams.Sequences[0];
                for (int i = 1; i < blastParams.Sequences.Count; i++)
                {
                    if (!Alphabets.CheckIsFromSameBase(primary.Alphabet, blastParams.Sequences[i].Alphabet))
                    {
                        throw new ArgumentException("Sequences must all share the same base alphabet.");
                    }
                }
            }

            var data = new List <KeyValuePair <string, string> > {
                this.CreateKVP("CMD", "Put")
            };

            if (blastParams.Program == BlastProgram.Megablast)
            {
                data.Add(this.CreateKVP("PROGRAM", BlastProgram.Blastn));
                data.Add(this.CreateKVP("MEGABLAST", "ON"));
            }
            else
            {
                data.Add(this.CreateKVP("PROGRAM", blastParams.Program));
            }
            data.Add(this.CreateKVP("DATABASE", blastParams.Database));

            data.AddRange(blastParams.ExtraParameters);

            // Add the sequences.
            StringBuilder sb = new StringBuilder();

            foreach (var seq in blastParams.Sequences)
            {
                sb.Append(seq.ConvertToString());
            }

            data.Add(this.CreateKVP("QUERY", sb.ToString()));

            return(new FormUrlEncodedContent(data));
        }
Example #3
0
        /// <summary>
        /// Sets up fields for the assembly process.
        /// </summary>
        private void Initialize()
        {
            this.currentStep            = 0;
            this.progressTimer          = new Timer(ProgressTimerInterval);
            this.progressTimer.Elapsed += new ElapsedEventHandler(this.ProgressTimerElapsed);

            this.statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InitializingStarted, DateTime.Now);
            this.RaiseStatusEvent();

            // Reset parameters not set by user, based on sequenceReads
            if (this.AllowKmerLengthEstimation)
            {
                this.kmerLength = EstimateKmerLength(this.sequenceReads);
            }
            else
            {
                if (this.kmerLength <= 0)
                {
                    throw new InvalidOperationException(Properties.Resource.KmerLength);
                }

                try
                {
                    if (!Alphabets.CheckIsFromSameBase(this.sequenceReads.First().Alphabet, Alphabets.DNA))
                    {
                        throw new InvalidOperationException(Properties.Resource.CannotAssembleSequenceType);
                    }
                }
                catch (Exception e)
                {
                    if (e.InnerException != null && !string.IsNullOrEmpty(e.InnerException.Message))
                    {
                        throw e.InnerException;
                    }
                    else
                    {
                        throw;
                    }
                }
            }

            if (this.dangleThreshold == -1)
            {
                this.dangleThreshold = this.kmerLength + 1;
            }

            if (this.redundantPathLengthThreshold == -1)
            {
                // Reference for default threshold for redundant path purger:
                // ABySS Release Notes 1.1.2 - "Pop bubbles shorter than N bp. The default is b=3*(k + 1)."
                this.redundantPathLengthThreshold = 3 * (this.kmerLength + 1);
            }

            this.InitializeDefaultGraphModifiers();

            this.statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InitializingEnded, DateTime.Now);
            this.RaiseStatusEvent();
        }
Example #4
0
        /// <summary>
        /// Validates input sequences and gap penalties.
        /// Checks that input sequences use the same alphabet.
        /// Checks that each symbol in the input sequences exists in the similarity matrix.
        /// Checks that gap penalties are less than or equal to 0.
        /// Throws exception if sequences fail these checks.
        /// Writes warning to ApplicationLog if gap penalty or penalties are positive.
        /// </summary>
        /// <param name="inputA">First input sequence.</param>
        /// <param name="inputB">Second input sequence.</param>
        protected void ValidateAlignInput(ISequence inputA, ISequence inputB)
        {
            if (inputA == null)
            {
                throw new ArgumentNullException("inputA");
            }

            if (inputB == null)
            {
                throw new ArgumentNullException("inputB");
            }

            if (!Alphabets.CheckIsFromSameBase(inputA.Alphabet, inputB.Alphabet))
            {
                Trace.Report(Properties.Resource.InputAlphabetsMismatch);
                throw new ArgumentException(Properties.Resource.InputAlphabetsMismatch);
            }

            if (null == internalSimilarityMatrix)
            {
                Trace.Report(Properties.Resource.SimilarityMatrixCannotBeNull);
                throw new ArgumentException(Properties.Resource.SimilarityMatrixCannotBeNull);
            }

            if (!internalSimilarityMatrix.ValidateSequence(inputA))
            {
                Trace.Report(Properties.Resource.FirstInputSequenceMismatchSimilarityMatrix);
                throw new ArgumentException(Properties.Resource.FirstInputSequenceMismatchSimilarityMatrix);
            }

            if (!internalSimilarityMatrix.ValidateSequence(inputB))
            {
                Trace.Report(Properties.Resource.SecondInputSequenceMismatchSimilarityMatrix);
                throw new ArgumentException(Properties.Resource.SecondInputSequenceMismatchSimilarityMatrix);
            }

            // Warning if gap penalty > 0
            if (GapOpenCost > 0)
            {
                ApplicationLog.WriteLine("Gap Open Penalty {0} > 0, possible error", GapOpenCost);
            }

            if (GapExtensionCost > 0)
            {
                ApplicationLog.WriteLine("Gap Extension Penalty {0} > 0, possible error", GapExtensionCost);
            }
        }
Example #5
0
        /// <summary>
        /// Construct an aligner
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="kmerLength">positive integer of kmer length</param>
        /// <param name="distanceFunctionName">enum: distance function name</param>
        /// <param name="hierarchicalClusteringMethodName">enum: cluster update method</param>
        /// <param name="profileAlignerMethodName">enum: profile-profile aligner name</param>
        /// <param name="profileFunctionName">enum: profile-profile distance function</param>
        /// <param name="similarityMatrix">similarity matrix</param>
        /// <param name="gapOpenPenalty">negative gapOpenPenalty</param>
        /// <param name="gapExtendPenalty">negative gapExtendPenalty</param>
        /// <param name="numberOfPartitions">the number of partitions in dynamic programming</param>
        /// <param name="degreeOfParallelism">degree of parallelism option for parallel extension</param>
        public PAMSAMMultipleSequenceAligner(
            IList <ISequence> sequences,
            int kmerLength,
            DistanceFunctionTypes distanceFunctionName,
            UpdateDistanceMethodsTypes hierarchicalClusteringMethodName,
            ProfileAlignerNames profileAlignerMethodName,
            ProfileScoreFunctionNames profileFunctionName,
            SimilarityMatrix similarityMatrix,
            int gapOpenPenalty,
            int gapExtendPenalty,
            int numberOfPartitions,
            int degreeOfParallelism)
        {
            Performance.Start();

            if (null == sequences)
            {
                throw new ArgumentNullException("sequences");
            }

            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty input sequences");
            }

            // Set parallel extension option
            if (degreeOfParallelism <= 0)
            {
                throw new ArgumentException("Invalid parallel degree parameter");
            }
            PAMSAMMultipleSequenceAligner.parallelOption = new ParallelOptions {
                MaxDegreeOfParallelism = degreeOfParallelism
            };

            if (numberOfPartitions <= 0)
            {
                throw new ArgumentException("Invalid number of partition parameter");
            }
            _numberOfPartitions = numberOfPartitions;

            // Validate data type
            _alphabet = sequences[0].Alphabet;
            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, _alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            List <String> similarityMatrixDNA = new List <String>();

            similarityMatrixDNA.Add("AmbiguousDNA");

            List <String> similarityMatrixRNA = new List <String>();

            similarityMatrixRNA.Add("AmbiguousRNA");

            List <String> similarityMatrixProtein = new List <String>();

            similarityMatrixProtein.Add("BLOSUM45");
            similarityMatrixProtein.Add("BLOSUM50");
            similarityMatrixProtein.Add("BLOSUM62");
            similarityMatrixProtein.Add("BLOSUM80");
            similarityMatrixProtein.Add("BLOSUM90");
            similarityMatrixProtein.Add("PAM250");
            similarityMatrixProtein.Add("PAM30");
            similarityMatrixProtein.Add("PAM70");

            if (_alphabet is DnaAlphabet)
            {
                if (!similarityMatrixDNA.Contains(similarityMatrix.Name))
                {
                    throw new ArgumentException("Inconsistent similarity matrix");
                }
            }
            else if (_alphabet is ProteinAlphabet)
            {
                if (!similarityMatrixProtein.Contains(similarityMatrix.Name))
                {
                    throw new ArgumentException("Inconsistent similarity matrix");
                }
            }
            else if (_alphabet is RnaAlphabet)
            {
                if (!similarityMatrixRNA.Contains(similarityMatrix.Name))
                {
                    throw new ArgumentException("Inconsistent similarity matrix");
                }
            }
            else
            {
                throw new ArgumentException("Invalid alphabet");
            }

            // Initialize parameters
            _kmerLength                       = kmerLength;
            _distanceFunctionName             = distanceFunctionName;
            _hierarchicalClusteringMethodName = hierarchicalClusteringMethodName;
            _profileAlignerName               = profileAlignerMethodName;
            _profileProfileFunctionName       = profileFunctionName;
            SimilarityMatrix                  = similarityMatrix;
            GapOpenCost                       = gapOpenPenalty;
            GapExtensionCost                  = gapExtendPenalty;

            MsaUtils.SetProfileItemSets(_alphabet);

            Performance.Snapshot("Start Aligning");

            // Work...
            Align(sequences);
        }
Example #6
0
        /// <summary>
        /// Sets up fields for the assembly process.
        /// </summary>
        protected void Initialize()
        {
            this._currentStep            = 0;
            this._progressTimer          = new Timer(ProgressTimerInterval);
            this._progressTimer.Elapsed += this.ProgressTimerElapsed;

            this._statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InitializingStarted, DateTime.Now);
            this.RaiseStatusEvent();

            // Reset parameters not set by user, based on sequenceReads
            if (this.AllowKmerLengthEstimation)
            {
                this._kmerLength = EstimateKmerLength(this._sequenceReads);
            }
            else
            {
                if (this._kmerLength <= 0)
                {
                    throw new InvalidOperationException(Properties.Resource.KmerLength);
                }

                try
                {
                    if (!Alphabets.CheckIsFromSameBase(this._sequenceReads.First().Alphabet, Alphabets.DNA))
                    {
                        throw new InvalidOperationException(Properties.Resource.CannotAssembleSequenceType);
                    }
                }
                catch (Exception e)
                {
                    if (e.InnerException != null && !string.IsNullOrEmpty(e.InnerException.Message))
                    {
                        throw e.InnerException;
                    }

                    throw;
                }
            }

            // TODO: Force an odd kmer length to avoid palindromes
            // Need to evaluate this more - for now rely on the user to
            // not pass in bad data.
            //if (_kmerLength % 2 == 0)
            //    _kmerLength++;

            // Enforce our boundaries (same as DeBruijnGraph code)
            _kmerLength = Math.Max(1, Math.Min(DeBruijnGraph.MaxKmerLength, _kmerLength));

            if (this.DanglingLinksThreshold == -1)
            {
                this.DanglingLinksThreshold = this._kmerLength + 1;
            }

            if (this.RedundantPathLengthThreshold == -1)
            {
                // Reference for default threshold for redundant path purger:
                // ABySS Release Notes 1.1.2 - "Pop bubbles shorter than N bp. The default is b=3*(k + 1)."
                this.RedundantPathLengthThreshold = 3 * (this._kmerLength + 1);
            }

            this.InitializeDefaultGraphModifiers();

            this._statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InitializingEnded, DateTime.Now);
            this.RaiseStatusEvent();
        }
Example #7
0
        /// <summary>
        /// For optimal graph formation, k-mer length should not be less
        /// than half the length of the longest input sequence and
        /// cannot be more than the length of the shortest input sequence.
        /// Reference for estimating kmerlength from reads: Supplement material from
        /// publication "ABySS: A parallel assembler for short read sequence data".
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        /// <returns>Estimated optimal kmer length.</returns>
        public static int EstimateKmerLength(IEnumerable <ISequence> sequences)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (!Alphabets.CheckIsFromSameBase(sequences.First().Alphabet, Alphabets.DNA))
            {
                throw new InvalidOperationException(Properties.Resource.CannotAssembleSequenceType);
            }

            long minSeqLength = long.MaxValue, maxSeqLength = 0;

            // Get the min/max ranges for the sequences
            foreach (ISequence seq in sequences)
            {
                long seqCount = seq.Count;
                if (minSeqLength > seqCount)
                {
                    minSeqLength = seqCount;
                }
                if (maxSeqLength < seqCount)
                {
                    maxSeqLength = seqCount;
                }
            }

            // for optimal purpose, kmer length should be more than half of longest sequence
            float minLengthOfKmer = Math.Max(1, maxSeqLength / 2);
            float maxLengthOfKmer = minSeqLength;

            int kmerLength = minLengthOfKmer < maxLengthOfKmer
                                 ? (int)Math.Ceiling((minLengthOfKmer + maxLengthOfKmer) / 2)
                                 : (int)Math.Floor(maxLengthOfKmer);

            // Make the kmer odd to avoid palindromes.
            if (kmerLength % 2 == 0)
            {
                kmerLength++;
                if (kmerLength > maxLengthOfKmer)
                {
                    kmerLength -= 2;
                }
                if (kmerLength <= 0)
                {
                    kmerLength = 1;
                }
            }

            // Final sanity checks.
            if (maxLengthOfKmer < kmerLength)
            {
                throw new InvalidOperationException(Properties.Resource.InappropriateKmerLength);
            }
            if (kmerLength <= 0)
            {
                throw new InvalidOperationException(Properties.Resource.KmerLength);
            }

            // Bound to our max size based on data handling.
            return(kmerLength > DeBruijnGraph.MaxKmerLength ? DeBruijnGraph.MaxKmerLength : kmerLength);
        }
Example #8
0
        /// <summary>
        /// This method assigns the alphabet from the input sequences
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <param name="similarityMatrix">Matrix to use for similarity comparisons</param>
        /// <param name="fixSimilarityMatrixErrors">True to fix any similarity matrix issue related to the alphabet.</param>
        private void SetAlphabet(IList <ISequence> sequences, SimilarityMatrix similarityMatrix, bool fixSimilarityMatrixErrors)
        {
            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty input sequences");
            }

            // Validate data type
            _alphabet = Alphabets.GetAmbiguousAlphabet(sequences[0].Alphabet);
            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, _alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            SimilarityMatrix bestSimilarityMatrix = null;

            if (_alphabet is DnaAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousDna);
            }
            else if (_alphabet is RnaAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.AmbiguousRna);
            }
            else if (_alphabet is ProteinAlphabet)
            {
                bestSimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50);
            }

            // Check or assign the similarity matrix.
            if (similarityMatrix == null)
            {
                SimilarityMatrix = bestSimilarityMatrix;
                if (SimilarityMatrix == null)
                {
                    throw new ArgumentException("Unknown alphabet - could not choose SimilarityMatrix.");
                }
            }
            else
            {
                List <String> similarityMatrixDNA = new List <String> {
                    "AmbiguousDNA"
                };
                List <String> similarityMatrixRNA = new List <String> {
                    "AmbiguousRNA"
                };
                List <String> similarityMatrixProtein = new List <String> {
                    "BLOSUM45", "BLOSUM50", "BLOSUM62", "BLOSUM80", "BLOSUM90", "PAM250", "PAM30", "PAM70"
                };

                if (_alphabet is DnaAlphabet)
                {
                    if (!similarityMatrixDNA.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                        {
                            SimilarityMatrix = bestSimilarityMatrix;
                        }
                        else
                        {
                            throw new ArgumentException("Inappropriate Similarity Matrix for DNA.");
                        }
                    }
                }
                else if (_alphabet is ProteinAlphabet)
                {
                    if (!similarityMatrixProtein.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                        {
                            SimilarityMatrix = bestSimilarityMatrix;
                        }
                        else
                        {
                            throw new ArgumentException("Inappropriate Similarity Matrix for Protein.");
                        }
                    }
                }
                else if (_alphabet is RnaAlphabet)
                {
                    if (!similarityMatrixRNA.Contains(similarityMatrix.Name))
                    {
                        if (fixSimilarityMatrixErrors)
                        {
                            SimilarityMatrix = bestSimilarityMatrix;
                        }
                        else
                        {
                            throw new ArgumentException("Inappropriate Similarity Matrix for RNA.");
                        }
                    }
                }
                else
                {
                    throw new ArgumentException("Invalid alphabet");
                }
            }
        }
Example #9
0
        /// <summary>
        /// Method which performs the alignment work.
        /// </summary>
        /// <param name="sequence1">First sequence</param>
        /// <param name="sequence2">Second sequence</param>
        /// <param name="useAffineGapModel">True to use affine gap model (separate open vs. extension cost)</param>
        /// <returns></returns>
        private IList <IPairwiseSequenceAlignment> DoAlign(ISequence sequence1, ISequence sequence2, bool useAffineGapModel)
        {
            usingAffineGapModel = useAffineGapModel;
            if (sequence1 == null)
            {
                throw new ArgumentNullException("sequence1");
            }
            if (sequence2 == null)
            {
                throw new ArgumentNullException("sequence2");
            }

            if (!Alphabets.CheckIsFromSameBase(sequence1.Alphabet, sequence2.Alphabet))
            {
                Trace.Report(Properties.Resource.InputAlphabetsMismatch);
                throw new ArgumentException(Properties.Resource.InputAlphabetsMismatch);
            }

            if (SimilarityMatrix == null)
            {
                Trace.Report(Properties.Resource.SimilarityMatrixCannotBeNull);
                throw new ArgumentException(Properties.Resource.SimilarityMatrixCannotBeNull);
            }

            if (!SimilarityMatrix.ValidateSequence(sequence1))
            {
                Trace.Report(Properties.Resource.FirstInputSequenceMismatchSimilarityMatrix);
                throw new ArgumentException(Properties.Resource.FirstInputSequenceMismatchSimilarityMatrix);
            }

            if (!SimilarityMatrix.ValidateSequence(sequence2))
            {
                Trace.Report(Properties.Resource.SecondInputSequenceMismatchSimilarityMatrix);
                throw new ArgumentException(Properties.Resource.SecondInputSequenceMismatchSimilarityMatrix);
            }


            if (GapOpenCost > GapExtensionCost)
            {
                Trace.Report(Properties.Resource.GapOpenGreaterThanGapExtension);
                throw new ArgumentException(Properties.Resource.GapOpenGreaterThanGapExtension);
            }

            _sequence1 = sequence1;
            _sequence2 = sequence2;
            _gap       = Alphabets.CheckIsFromSameBase(Alphabets.Protein, sequence1.Alphabet) ? Alphabets.Protein.Gap : Alphabets.DNA.Gap;

            ReferenceSequence = GetByteArrayFromSequence(_sequence1);
            QuerySequence     = GetByteArrayFromSequence(_sequence2);

            // Assign consensus resolver if it was not assigned already.
            IAlphabet alphabet = sequence1.Alphabet;

            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(alphabet.HasAmbiguity ? alphabet : Alphabets.AmbiguousAlphabetMap[sequence1.Alphabet]);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = alphabet.HasAmbiguity ? alphabet : Alphabets.AmbiguousAlphabetMap[sequence1.Alphabet];
            }

            return(new List <IPairwiseSequenceAlignment> {
                Process()
            });
        }
Example #10
0
        /// <summary>
        /// For optimal graph formation, k-mer length should not be less
        /// than half the length of the longest input sequence and
        /// cannot be more than the length of the shortest input sequence.
        /// Reference for estimating kmerlength from reads: Supplement material from
        /// publication "ABySS: A parallel assembler for short read sequence data".
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        /// <returns>Estimated optimal kmer length.</returns>
        public static int EstimateKmerLength(IEnumerable <ISequence> sequences)
        {
            // kmer length should be less than input sequence lengths
            long minSeqLength = long.MaxValue;
            long maxSeqLength = 0;

            float maxLengthOfKmer = int.MaxValue;
            float minLengthOfKmer = 0;
            int   kmerLength      = 0;

            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (!Alphabets.CheckIsFromSameBase(sequences.First().Alphabet, Alphabets.DNA))
            {
                throw new InvalidOperationException(Properties.Resource.CannotAssembleSequenceType);
            }

            foreach (ISequence seq in sequences)
            {
                long seqCount = seq.Count;
                if (minSeqLength > seqCount)
                {
                    // Get the min seq count to maxLength.
                    minSeqLength = seqCount;
                }

                if (maxSeqLength < seqCount)
                {
                    // Get the max sequence count to minLength.
                    maxSeqLength = seqCount;
                }
            }

            maxLengthOfKmer = minSeqLength;

            // for optimal purpose, kmer length should be more than half of longest sequence
            minLengthOfKmer = maxSeqLength / 2;

            if (minLengthOfKmer < maxLengthOfKmer)
            {
                // Choose median value between the end-points
                kmerLength = (int)Math.Ceiling((minLengthOfKmer + maxLengthOfKmer) / 2);
            }
            else
            {
                // In this case pick maxLength, since this is a hard limit
                kmerLength = (int)Math.Floor(maxLengthOfKmer);
            }

            if (maxLengthOfKmer < kmerLength)
            {
                throw new InvalidOperationException(Properties.Resource.InappropriateKmerLength);
            }

            if (kmerLength <= 0)
            {
                throw new InvalidOperationException(Properties.Resource.KmerLength);
            }

            // This needs to be modified when we have a structure which can hold kmerData of more than 32 symbols.
            return(kmerLength > 32 ? 32 : kmerLength);
        }
Example #11
0
        /// <summary>
        /// Main pregressive alignment algorithm aligns a set of sequences guided by
        /// a binary tree.
        /// </summary>
        /// <param name="sequences">input sequences</param>
        /// <param name="tree">a binary guide tree</param>
        public void Align(IList <ISequence> sequences, BinaryGuideTree tree)
        {
            SequenceWeighting sequenceWeighting = null;

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                sequenceWeighting = new SequenceWeighting(tree);

                /*
                 * for (int i = 0; i < sequenceWeighting.Weights.Length; ++i)
                 * {
                 *  sequenceWeighting.Weights[i] = 1;
                 * }
                 */
            }

            if (sequences.Count == 0)
            {
                throw new ArgumentException("Empty set of sequences");
            }
            IAlphabet alphabet = sequences[0].Alphabet;

            Parallel.For(1, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                if (!Alphabets.CheckIsFromSameBase(sequences[i].Alphabet, alphabet))
                {
                    throw new ArgumentException("Inconsistent sequence alphabet");
                }
            });

            if (PAMSAMMultipleSequenceAligner.UseWeights)
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i], sequenceWeighting.Weights[i]);
                    tree.Nodes[i].Weight           = sequenceWeighting.Weights[i];
                });
            }
            else
            {
                // Generate profile for leaf nodes
                Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
                {
                    tree.Nodes[i].ProfileAlignment = ProfileAlignment.GenerateProfileAlignment(sequences[i]);
                });
            }

            // Iterate internal nodes;
            // as defined in the tree, the last node is the root
            for (int i = sequences.Count; i < tree.Nodes.Count; ++i)
            {
                if (tree.Nodes[i].NeedReAlignment)
                {
                    // pull out its children
                    _nodeA = tree.Nodes[i].LeftChildren;
                    _nodeB = tree.Nodes[i].RightChildren;

                    if (PAMSAMMultipleSequenceAligner.UseWeights)
                    {
                        _profileAligner.Weights    = new float[2];
                        _profileAligner.Weights[0] = _nodeA.Weight;
                        _profileAligner.Weights[1] = _nodeB.Weight;

                        tree.Nodes[i].Weight = _nodeA.Weight + _nodeB.Weight;
                    }

                    // align two profiles
                    ProfileAlignment result = null;
                    if (_nodeA.ProfileAlignment.NumberOfSequences < _nodeB.ProfileAlignment.NumberOfSequences)
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeA.ProfileAlignment, _nodeB.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                    }
                    else
                    {
                        result = (ProfileAlignment)_profileAligner.Align(
                            _nodeB.ProfileAlignment, _nodeA.ProfileAlignment);
                        // assign aligned profiles to the current internal node
                        tree.Nodes[i].ProfileAlignment = result;

                        // generate eString for the children nodes
                        _nodeA.EString = _profileAligner.GenerateEString(_profileAligner.AlignedB);
                        _nodeB.EString = _profileAligner.GenerateEString(_profileAligner.AlignedA);
                    }


                    // children node profiles can be deleted
                    _nodeA.ProfileAlignment.Clear();
                    _nodeB.ProfileAlignment.Clear();
                }
            }

            // Convert original unaligned sequences to aligned ones by applying alignment paths in eStrings
            try
            {
                _alignedSequences = new List <ISequence>(sequences.Count);
            }
            catch (OutOfMemoryException ex)
            {
                throw new Exception("Out of memory", ex.InnerException);
            }

            for (int i = 0; i < sequences.Count; ++i)
            {
                _alignedSequences.Add(null);
            }

            Parallel.For(0, sequences.Count, PAMSAMMultipleSequenceAligner.parallelOption, i =>
            {
                ISequence seq = sequences[i];
                BinaryGuideTreeNode node;
                node = tree.Nodes[i];
                while (!node.IsRoot)
                {
                    seq  = _profileAligner.GenerateSequenceFromEString(node.EString, seq);
                    node = node.Parent;
                }
                _alignedSequences[i] = seq;
            });
        }