Example #1
0
 /// <summary>
 /// Initializations to be done before aligning sequences.
 /// Sets consensus resolver property to correct alphabet.
 /// </summary>
 /// <param name="inputSequence">Input sequence.</param>
 private void InitializeAlign(ISequence inputSequence)
 {
     // Initializations
     if (ConsensusResolver == null)
     {
         ConsensusResolver = new SimpleConsensusResolver(Alphabets.AmbiguousAlphabetMap[inputSequence.Alphabet]);
     }
     else
     {
         ConsensusResolver.SequenceAlphabet = Alphabets.AmbiguousAlphabetMap[inputSequence.Alphabet];
     }
 }
Example #2
0
 /// <summary>
 /// Initializations to be done before aligning sequences.
 /// Sets consensus resolver property to correct alphabet.
 /// </summary>
 /// <param name="inputSequence">input sequence</param>
 private void InitializeAlign(ISequence inputSequence)
 {
     // Initializations
     if (ConsensusResolver == null)
     {
         ConsensusResolver = new SimpleConsensusResolver(inputSequence.Alphabet);
     }
     else
     {
         ConsensusResolver.SequenceAlphabet = inputSequence.Alphabet;
     }
 }
Example #3
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// Public method to allow testing of consensus generation part.
        /// Used by test automation.
        /// </summary>
        /// <param name="alphabet">Sequence alphabet</param>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        public void MakeConsensus(IAlphabet alphabet, Contig contig)
        {
            _sequenceAlphabet = alphabet;
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
            }

            MakeConsensus(contig);
        }
Example #4
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequence">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignments</returns>
        private IList <IPairwiseSequenceAlignment> Alignment(
            ISequence referenceSequence,
            IList <ISequence> querySequenceList)
        {
            // Initializations
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(referenceSequence.Alphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = referenceSequence.Alphabet;
            }

            if (StoreMUMs)
            {
                return(AlignmentWithAccumulatedMUMs(referenceSequence, querySequenceList));
            }
            else
            {
                return(AlignmentWithoutAccumulatedMUMs(referenceSequence, querySequenceList));
            }
        }
Example #5
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">Reference sequence.</param>
        /// <param name="querySequenceList">List of input sequences.</param>
        /// <returns>A list of sequence alignment.</returns>
        private IEnumerable <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> querySequenceList)
        {
            ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet);

            IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         sequenceAlignment;
            IList <PairwiseAlignedSequence>    alignments;

            List <DeltaAlignment> deltas = new List <DeltaAlignment>();

            foreach (ISequence refSequence in referenceSequenceList)
            {
                this.nucmerAlgo = new NUCmer((Sequence)refSequence);

                if (GapOpenCost != DefaultGapOpenCost)
                {
                    this.nucmerAlgo.GapOpenCost = GapOpenCost;
                }
                if (GapExtensionCost != DefaultGapExtensionCost)
                {
                    this.nucmerAlgo.GapExtensionCost = GapExtensionCost;
                }
                if (LengthOfMUM != DefaultLengthOfMUM)
                {
                    this.nucmerAlgo.LengthOfMUM = LengthOfMUM;
                }

                // Set the ClusterBuilder properties to defaults
                if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation)
                {
                    this.nucmerAlgo.FixedSeparation = FixedSeparation;
                }
                if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation)
                {
                    this.nucmerAlgo.MaximumSeparation = MaximumSeparation;
                }
                if (MinimumScore != ClusterBuilder.DefaultMinimumScore)
                {
                    this.nucmerAlgo.MinimumScore = MinimumScore;
                }
                if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor)
                {
                    this.nucmerAlgo.SeparationFactor = SeparationFactor;
                }
                if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength)
                {
                    this.nucmerAlgo.BreakLength = BreakLength;
                }

                this.nucmerAlgo.ConsensusResolver = ConsensusResolver;
                if (SimilarityMatrix != null)
                {
                    this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix;
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    IEnumerable <DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence);
                    deltas.AddRange(deltaAlignment);
                }
            }

            if (deltas.Count > 0)
            {
                ISequence concatReference = referenceSequenceList.ElementAt(0);
                //// concat all the sequences into one sequence
                if (referenceSequenceList.Count() > 1)
                {
                    concatReference = ConcatSequence(referenceSequenceList);
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    List <DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList();
                    sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence);

                    // Convert delta alignments to sequence alignments
                    alignments = ConvertDeltaToAlignment(qDelta);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                align.FirstSequence,
                                align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                align.FirstSequence,
                                align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }

                    results.Add(sequenceAlignment);
                }
            }

            return(results);
        }
Example #6
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Drived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">reference sequence</param>
        /// <param name="querySequenceList">list of input sequences</param>
        /// <returns>A list of sequence alignment</returns>
        private IList <IPairwiseSequenceAlignment> Alignment(
            IList <ISequence> referenceSequenceList,
            IList <ISequence> querySequenceList)
        {
            // Initializations
            if (referenceSequenceList.Count > 0)
            {
                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList[0].Alphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = referenceSequenceList[0].Alphabet;
                }
            }

            IList <IPairwiseSequenceAlignment> results           = new List <IPairwiseSequenceAlignment>();
            IPairwiseSequenceAlignment         sequenceAlignment = null;
            IList <DeltaAlignment>             deltaAlignments   = null;
            IList <PairwiseAlignedSequence>    alignments        = null;
            ISequence referenceSequence = null;

            // Validate the input
            Validate(referenceSequenceList, querySequenceList);

            // Step:1 concat all the sequences into one sequence
            if (referenceSequenceList.Count > 1)
            {
                referenceSequence = ConcatSequence(referenceSequenceList);
            }
            else
            {
                referenceSequence = referenceSequenceList[0];
            }

            // Getting refernce sequence
            _referenceSequence = referenceSequence;

            // Step2 : building suffix trees using reference sequence
            _suffixTree = BuildSuffixTree(_referenceSequence);

            // On each query sequence aligned with reference sequence
            foreach (ISequence sequence in querySequenceList)
            {
                if (sequence.Equals(referenceSequence))
                {
                    continue;
                }

                sequenceAlignment = new PairwiseSequenceAlignment(referenceSequence, sequence);

                // Step3 : streaming process is performed with the query sequence
                _mumList = Streaming(_suffixTree, sequence, LengthOfMUM);

                if (_mumList.Count > 0)
                {
                    // Step 5 : Get the list of Clusters
                    _clusterList = GetClusters(_mumList);

                    // Step 7: Process Clusters and get delta
                    deltaAlignments = ProcessCluster(
                        referenceSequenceList,
                        _clusterList);

                    // Step 8: Convert delta alignments to sequence alignments
                    alignments = ConvertDeltaToAlignment(deltaAlignments);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                align.FirstSequence,
                                align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                align.FirstSequence,
                                align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }
                }

                results.Add(sequenceAlignment);
            }

            return(results);
        }
Example #7
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// Public method to allow testing of consensus generation part.
        /// Used by test automation.
        /// </summary>
        /// <param name="alphabet">Sequence alphabet</param>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        public void MakeConsensus(IAlphabet alphabet, Contig contig)
        {
            _sequenceAlphabet = alphabet;
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
            }

            MakeConsensus(contig);
        }
Example #8
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs. 
        /// </summary>
        /// <remarks>
        /// The algorithm is:
        /// 1.  initialize list of contigs to empty list. List of seqs is passed as argument.
        /// 2.  compute pairwise overlap scores for each pair of input seqs (with reversal and
        ///     complementation as appropriate).
        /// 3.  choose best overlap score. the “merge items” (can be seqs or contigs) are the 
        ///     items with that score. If best score is less than threshold, assembly is finished.
        /// 4.  merge the merge items into a single contig and remove them from their list(s)
        /// 5.  compute the overlap between new item and all existing items
        /// 6.  go to step 3
        /// </remarks>
        /// <param name="inputSequences">The sequences to assemble.</param>
        /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of 
        /// contigs and list of unmerged sequences which are result of this assembly.</returns>
        public IDeNovoAssembly Assemble(IEnumerable<ISequence> inputSequences)
        {
            if (null == inputSequences)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameInputSequences);
            }

            // numbering convention: every pool item (whether sequence or contig)
            // gets a fixed number.
            // sequence index = index into inputs (which we won't modify)
            // contig index = nSequences + index into contigs
            List<PoolItem> pool = inputSequences.Select(seq => new PoolItem(seq)).ToList();

            // Initialization
            int sequenceCount = pool.Count;
            if (sequenceCount > 0)
            {
                _sequenceAlphabet = pool[0].Sequence.Alphabet;

                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
                }
            }

            // put all the initial sequences into the pool, and generate the pair scores.
            // there are no contigs in the pool yet.
            // to save an iteration, we'll also find the best global score as we go.
            ItemScore globalBest = new ItemScore(-1, -1, false, false, 0, 0);
            int globalBestLargerIndex = -1;
            int unconsumedCount = sequenceCount;

            // Compute alignment scores for all combinations between input sequences
            // Store these scores in the poolItem corresponding to each sequence
            for (int newSeq = 0; newSeq < pool.Count; ++newSeq)
            {
                PoolItem newItem = pool[newSeq];
                for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq)
                {
                    PoolItem oldItem = pool[oldSeq];
                    ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                    newItem.Scores.Add(score);
                    if (score.OverlapScore > globalBest.OverlapScore)
                    {
                        globalBest = new ItemScore(score);
                        globalBestLargerIndex = newSeq;
                    }
                }
            }

            // Merge sequence if best score is above threshold 
            // and add new contig to pool
            if (globalBest.OverlapScore >= MergeThreshold)
            {
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore);
                }

                PoolItem mergeItem1 = pool[globalBest.OtherItem];
                PoolItem mergeItem2 = pool[globalBestLargerIndex];
                Contig newContig = new Contig();
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "new pool item {0} will merge old items {1} and {2}",
                        pool.Count,
                        globalBest.OtherItem,
                        globalBestLargerIndex);
                }

                MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                MakeConsensus(newContig);

                // Set ConsumedBy value and 
                // free memory as these sequences are no longer used
                mergeItem1.ConsumedBy = pool.Count;
                mergeItem2.ConsumedBy = pool.Count;
                mergeItem1.FreeSequences();
                mergeItem2.FreeSequences();
                pool.Add(new PoolItem(newContig));
                unconsumedCount--;

                while (unconsumedCount > 1)
                {
                    // Compute scores for each unconsumed sequence with new contig
                    int newSeq = pool.Count - 1;
                    PoolItem newItem = pool[newSeq];
                    for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq)
                    {
                        PoolItem oldItem = pool[oldSeq];
                        if (oldItem.ConsumedBy >= 0)
                        {
                            // already consumed - just add dummy score to maintain correct indices
                            newItem.Scores.Add(new ItemScore());
                        }
                        else
                        {
                            ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                            newItem.Scores.Add(score);
                        }
                    }

                    // find best global score in the modified pool.
                    globalBest = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    for (int current = 0; current < pool.Count; ++current)
                    {
                        PoolItem curItem = pool[current];
                        if (curItem.ConsumedBy < 0)
                        {
                            for (int other = 0; other < current; ++other)
                            {
                                if (pool[other].ConsumedBy < 0)
                                {
                                    ItemScore itemScore = curItem.Scores[other];
                                    if (itemScore.OverlapScore > globalBest.OverlapScore)
                                    {
                                        globalBest = new ItemScore(itemScore);  // copy the winner so far
                                        globalBestLargerIndex = current;
                                    }
                                }
                            }
                        }
                    }

                    if (globalBest.OverlapScore >= MergeThreshold)
                    {
                        // Merge sequences / contigs if above threshold
                        mergeItem1 = pool[globalBest.OtherItem];
                        mergeItem2 = pool[globalBestLargerIndex];
                        newContig = new Contig();

                        if (mergeItem1.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                        }

                        if (mergeItem2.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                        }

                        MakeConsensus(newContig);
                        if (Trace.Want(Trace.AssemblyDetails))
                        {
                            Dump(newContig);
                        }

                        // Set ConsumedBy value for these poolItems and 
                        // free memory as these sequences are no longer used
                        mergeItem1.ConsumedBy = pool.Count;
                        mergeItem2.ConsumedBy = pool.Count;
                        mergeItem1.FreeSequences();
                        mergeItem2.FreeSequences();

                        pool.Add(new PoolItem(newContig));
                        unconsumedCount--;
                    }
                    else
                    {
                        // None of the alignment scores cross threshold
                        // No more merges possible. So end iteration.
                        break;
                    }
                }
            }

            // no further qualifying merges, so we're done.
            // populate contigs and unmergedSequences
            OverlapDeNovoAssembly sequenceAssembly = new OverlapDeNovoAssembly();
            foreach (PoolItem curItem in pool)
            {
                if (curItem.ConsumedBy < 0)
                {
                    if (curItem.IsContig)
                    {
                        sequenceAssembly.Contigs.Add(curItem.Contig);
                    }
                    else
                    {
                        sequenceAssembly.UnmergedSequences.Add(curItem.Sequence);
                    }
                }
            }

            return sequenceAssembly;
        }
Example #9
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs.
        /// </summary>
        /// <remarks>
        /// The algorithm is:
        /// 1.  initialize list of contigs to empty list. List of seqs is passed as argument.
        /// 2.  compute pairwise overlap scores for each pair of input seqs (with reversal and
        ///     complementation as appropriate).
        /// 3.  choose best overlap score. the “merge items” (can be seqs or contigs) are the
        ///     items with that score. If best score is less than threshold, assembly is finished.
        /// 4.  merge the merge items into a single contig and remove them from their list(s)
        /// 5.  compute the overlap between new item and all existing items
        /// 6.  go to step 3
        /// </remarks>
        /// <param name="inputSequences">The sequences to assemble.</param>
        /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of
        /// contigs and list of unmerged sequences which are result of this assembly.</returns>
        public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences)
        {
            if (null == inputSequences)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameInputSequences);
            }

            // Initializations
            if (inputSequences.Count() > 0)
            {
                _sequenceAlphabet = inputSequences.First().Alphabet;

                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
                }
            }

            OverlapDeNovoAssembly sequenceAssembly = null;

            // numbering convention: every pool item (whether sequence or contig)
            // gets a fixed number.
            // sequence index = index into inputs (which we won't modify)
            // contig index = nSequences + index into contigs
            List <PoolItem> pool = new List <PoolItem>();

            foreach (ISequence seq in inputSequences)
            {
                pool.Add(new PoolItem(seq));
            }

            // put all the initial sequences into the pool, and generate the pair scores.
            // there are no contigs in the pool yet.
            // to save an iteration, we'll also find the best global score as we go.
            ItemScore globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
            int       globalBestLargerIndex = -1;
            int       unconsumedCount       = inputSequences.Count();

            // Compute alignment scores for all combinations between input sequences
            // Store these scores in the poolItem correspodning to each sequence
            for (int newSeq = 0; newSeq < pool.Count; ++newSeq)
            {
                PoolItem newItem = pool[newSeq];
                for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq)
                {
                    PoolItem  oldItem = pool[oldSeq];
                    ItemScore score   = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                    newItem.Scores.Add(score);
                    if (score.OverlapScore > globalBest.OverlapScore)
                    {
                        globalBest            = new ItemScore(score);
                        globalBestLargerIndex = newSeq;
                    }
                }
            }

            // Merge sequence if best score is above threshold
            // and add new contig to pool
            if (globalBest.OverlapScore >= MergeThreshold)
            {
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore);
                }

                PoolItem mergeItem1 = pool[globalBest.OtherItem];
                PoolItem mergeItem2 = pool[globalBestLargerIndex];
                Contig   newContig  = new Contig();
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "new pool item {0} will merge old items {1} and {2}",
                        pool.Count,
                        globalBest.OtherItem,
                        globalBestLargerIndex);
                }

                MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);

                MakeConsensus(newContig);

                // Set ConsumedBy value and
                // free memory as these sequences are no longer used
                mergeItem1.ConsumedBy = pool.Count;
                mergeItem2.ConsumedBy = pool.Count;
                mergeItem1.FreeSequences();
                mergeItem2.FreeSequences();
                pool.Add(new PoolItem(newContig));
                unconsumedCount--;

                while (unconsumedCount > 1)
                {
                    // Compute scores for each unconsumed sequence with new contig
                    globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    int      newSeq  = pool.Count - 1;
                    PoolItem newItem = pool[newSeq];
                    for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq)
                    {
                        PoolItem oldItem = pool[oldSeq];
                        if (oldItem.ConsumedBy >= 0)
                        {
                            // already consumed - just add dummy score to maintain correct indices
                            newItem.Scores.Add(new ItemScore());
                        }
                        else
                        {
                            ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                            newItem.Scores.Add(score);
                        }
                    }

                    // find best global score in the modified pool.
                    globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    for (int current = 0; current < pool.Count; ++current)
                    {
                        PoolItem curItem = pool[current];
                        if (curItem.ConsumedBy < 0)
                        {
                            for (int other = 0; other < current; ++other)
                            {
                                if (pool[other].ConsumedBy < 0)
                                {
                                    ItemScore itemScore = curItem.Scores[other];
                                    if (itemScore.OverlapScore > globalBest.OverlapScore)
                                    {
                                        globalBest            = new ItemScore(itemScore); // copy the winner so far
                                        globalBestLargerIndex = current;
                                    }
                                }
                            }
                        }
                    }

                    if (globalBest.OverlapScore >= MergeThreshold)
                    {
                        // Merge sequences / contigs if above threshold
                        mergeItem1 = pool[globalBest.OtherItem];
                        mergeItem2 = pool[globalBestLargerIndex];
                        newContig  = new Contig();

                        if (mergeItem1.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                        }

                        if (mergeItem2.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                        }

                        MakeConsensus(newContig);
                        if (Trace.Want(Trace.AssemblyDetails))
                        {
                            Dump(newContig);
                        }

                        // Set ConsumedBy value for these poolItems and
                        // free memory as these sequences are no longer used
                        mergeItem1.ConsumedBy = pool.Count;
                        mergeItem2.ConsumedBy = pool.Count;
                        mergeItem1.FreeSequences();
                        mergeItem2.FreeSequences();

                        pool.Add(new PoolItem(newContig));
                        unconsumedCount--;
                    }
                    else
                    {
                        // None of the alignment scores cross threshold
                        // No more merges possible. So end iteration.
                        break;
                    }
                }
            }

            // no further qualifying merges, so we're done.
            // populate contigs and unmergedSequences
            sequenceAssembly = new OverlapDeNovoAssembly();
            foreach (PoolItem curItem in pool)
            {
                if (curItem.ConsumedBy < 0)
                {
                    if (curItem.IsContig)
                    {
                        sequenceAssembly.Contigs.Add(curItem.Contig);
                    }
                    else
                    {
                        sequenceAssembly.UnmergedSequences.Add(curItem.Sequence);
                    }
                }
            }

            return(sequenceAssembly);
        }
     /// <summary>
 /// Initializations to be done before aligning sequences.
 /// Sets consensus resolver property to correct alphabet.
 /// </summary>
 /// <param name="inputSequence">Input sequence.</param>
 private void InitializeAlign(ISequence inputSequence)
 {
     // Initializations
     if (ConsensusResolver == null)
     {
         ConsensusResolver = new SimpleConsensusResolver(Alphabets.AmbiguousAlphabetMap[inputSequence.Alphabet]);
     }
     else
     {
         ConsensusResolver.SequenceAlphabet = Alphabets.AmbiguousAlphabetMap[inputSequence.Alphabet];
     }
 }
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        private void DoAlignment(IList<ISequence> sequences)
        {
            Debug.Assert(this.alphabet != null);
            Debug.Assert(sequences.Count > 0);

            // Initializations
            if (ConsensusResolver == null)
                ConsensusResolver = new SimpleConsensusResolver(this.alphabet);
            else
                ConsensusResolver.SequenceAlphabet = this.alphabet;

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;
            switch (ProfileAlignerName)
            {
                case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                    if (this.degreeOfParallelism == 1)
                    {
                        profileAligner = new NeedlemanWunschProfileAlignerSerial(
                            SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                    }
                    else
                    {
                        profileAligner = new NeedlemanWunschProfileAlignerParallel(
                            SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                    }
                    break;
                case (ProfileAlignerNames.SmithWatermanProfileAligner):
                    if (this.degreeOfParallelism == 1)
                    {
                        profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);

                    }
                    else
                    {
                        profileAligner = new SmithWatermanProfileAlignerParallel(
                    SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);

                    }
                    break;
                default:
                    throw new ArgumentException("Invalid profile aligner name");
            }

            this.AlignedSequences = new List<ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            ReportLog("Stage 1");
            // Generate DistanceMatrix
            var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

            // Generate Guide Tree
            var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);
            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > this.AlignmentScoreA)
            {
                this.AlignmentScoreA = currentScore;
                this.AlignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (this.AlignmentScoreA > this.AlignmentScore)
            {
                this.AlignmentScore = this.AlignmentScoreA;
                this.AlignedSequences = this.AlignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                this.AlignedSequencesB = this.AlignedSequencesA;
                this.AlignedSequencesC = this.AlignedSequencesA;
                this.AlignmentScoreB = this.AlignmentScoreA;
                this.AlignmentScoreC = this.AlignmentScoreA;
            }
            else
            {
                BinaryGuideTree binaryGuideTreeB = null;
                IHierarchicalClustering hierarcicalClusteringB = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (UseStageB)
                {
                    // STAGE 2
                    ReportLog("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    while (true)
                    {
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreB)
                        {
                            this.AlignmentScoreB = currentScore;
                            this.AlignedSequencesB = progressiveAlignerB.AlignedSequences;
                        }
                        break;
                    }
                    if (this.AlignmentScoreB > this.AlignmentScore)
                    {
                        this.AlignmentScore = this.AlignmentScoreB;
                        this.AlignedSequences = this.AlignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                ReportLog("Stage 3");
                // refinement
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                this.AlignedSequencesC = new List<ISequence>(this.AlignedSequences.Count);
                foreach (ISequence t in this.AlignedSequences)
                {
                    this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray())
                        {
                            ID = t.ID,
                            // Do not shallow copy dictionary
                            //Metadata = t.Metadata
                        });
                }

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    ReportLog("Refinement iter " + refinementTime);
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        List<int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        List<int>[] allIndelPositions = new List<int>[2];

                        IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        List<int>[] eStrings = new List<int>[2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], ParallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List<byte> seqBytes = new List<byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(this.AlignedSequencesC[i][j]);
                                    }
                                }

                                this.AlignedSequencesC[i] = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray()));
                                this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID;
                                // Do not shallow copy dictionary
                                //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreC)
                        {
                            this.AlignmentScoreC = currentScore;
                            needRefinement = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                    (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }

                }
                if (this.AlignmentScoreC > this.AlignmentScore)
                {
                    this.AlignmentScore = this.AlignmentScoreC;
                    this.AlignedSequences = this.AlignedSequencesC;
                }
                ReportLog("Stop Stage 3");
            }
        }
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="sequences">Input sequences</param>
        /// <returns>Alignment results</returns>
        private void DoAlignment(IList <ISequence> sequences)
        {
            Debug.Assert(this.alphabet != null);
            Debug.Assert(sequences.Count > 0);

            // Initializations
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(this.alphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = this.alphabet;
            }

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;

            switch (ProfileAlignerName)
            {
            case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                if (this.degreeOfParallelism == 1)
                {
                    profileAligner = new NeedlemanWunschProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                else
                {
                    profileAligner = new NeedlemanWunschProfileAlignerParallel(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                break;

            case (ProfileAlignerNames.SmithWatermanProfileAligner):
                if (this.degreeOfParallelism == 1)
                {
                    profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                else
                {
                    profileAligner = new SmithWatermanProfileAlignerParallel(
                        SimilarityMatrix, ProfileProfileFunctionName, GapOpenCost, GapExtensionCost, this.numberOfPartitions);
                }
                break;

            default:
                throw new ArgumentException("Invalid profile aligner name");
            }

            this.AlignedSequences = new List <ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            ReportLog("Stage 1");
            // Generate DistanceMatrix
            var kmerDistanceMatrixGenerator = new KmerDistanceMatrixGenerator(sequences, KmerLength, this.alphabet, DistanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

            // Generate Guide Tree
            var binaryGuideTree = new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);

            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > this.AlignmentScoreA)
            {
                this.AlignmentScoreA   = currentScore;
                this.AlignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (this.AlignmentScoreA > this.AlignmentScore)
            {
                this.AlignmentScore   = this.AlignmentScoreA;
                this.AlignedSequences = this.AlignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                this.AlignedSequencesB = this.AlignedSequencesA;
                this.AlignedSequencesC = this.AlignedSequencesA;
                this.AlignmentScoreB   = this.AlignmentScoreA;
                this.AlignmentScoreC   = this.AlignmentScoreA;
            }
            else
            {
                BinaryGuideTree               binaryGuideTreeB              = null;
                IHierarchicalClustering       hierarcicalClusteringB        = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (UseStageB)
                {
                    // STAGE 2
                    ReportLog("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    while (true)
                    {
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                     (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreB)
                        {
                            this.AlignmentScoreB   = currentScore;
                            this.AlignedSequencesB = progressiveAlignerB.AlignedSequences;
                        }
                        break;
                    }
                    if (this.AlignmentScoreB > this.AlignmentScore)
                    {
                        this.AlignmentScore   = this.AlignmentScoreB;
                        this.AlignedSequences = this.AlignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                ReportLog("Stage 3");
                // refinement
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                this.AlignedSequencesC = new List <ISequence>(this.AlignedSequences.Count);
                foreach (ISequence t in this.AlignedSequences)
                {
                    this.AlignedSequencesC.Add(new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), t.ToArray())
                    {
                        ID = t.ID,
                        // Do not shallow copy dictionary
                        //Metadata = t.Metadata
                    });
                }

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    ReportLog("Refinement iter " + refinementTime);
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        List <int>[] leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        List <int>[] allIndelPositions = new List <int> [2];

                        IProfileAlignment[] separatedProfileAlignments = ProfileAlignment.ProfileExtraction(this.AlignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        List <int>[]        eStrings = new List <int> [2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], ParallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List <byte> seqBytes = new List <byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < this.AlignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(this.AlignedSequencesC[i][j]);
                                    }
                                }

                                this.AlignedSequencesC[i]    = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(this.alphabet), seqBytes.ToArray()));
                                this.AlignedSequencesC[i].ID = this.AlignedSequencesC[i].ID;
                                // Do not shallow copy dictionary
                                //(_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(this.AlignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > this.AlignmentScoreC)
                        {
                            this.AlignmentScoreC = currentScore;
                            needRefinement       = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(this.AlignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                         (kimuraDistanceMatrixGenerator.DistanceMatrix, HierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }
                }
                if (this.AlignmentScoreC > this.AlignmentScore)
                {
                    this.AlignmentScore   = this.AlignmentScoreC;
                    this.AlignedSequences = this.AlignedSequencesC;
                }
                ReportLog("Stop Stage 3");
            }
        }
Example #13
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable <ISequence> GenerateConsensus(IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver                resolver              = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance);
            Dictionary <long, Sequence>            outputSequences       = new Dictionary <long, Sequence>();
            Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>();
            IEnumerator <DeltaAlignment>           deltaEnumerator       = alignmentBetweenReferenceAndReads.GetEnumerator();

            long           currentAlignmentStartOffset = 0;
            long           currentIndex = 0;
            long           inDeltaIndex = 0;
            DeltaAlignment lastDelta;

            List <byte>           currentContig  = new List <byte>();
            List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>();

            // no deltas
            if (!deltaEnumerator.MoveNext())
            {
                return(outputSequences.Values);
            }

            lastDelta = deltaEnumerator.Current;
            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, lastDelta.QuerySequence.GetSubSequence(lastDelta.SecondSequenceStart, (lastDelta.SecondSequenceEnd - lastDelta.SecondSequenceStart) + 1));

                        // Get next delta
                        if (deltaEnumerator.MoveNext())
                        {
                            lastDelta = deltaEnumerator.Current;
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int    symbolCounter         = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        foreach (var deltaToRemove in deltasToRemove)
                        {
                            deltasInCurrentContig.Remove(deltaToRemove);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, lastDelta.QuerySequence.GetSubSequence(lastDelta.SecondSequenceStart, (lastDelta.SecondSequenceEnd - lastDelta.SecondSequenceStart) + 1));

                        // check next delta
                        if (deltaEnumerator.MoveNext())
                        {
                            lastDelta = deltaEnumerator.Current;
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }while (deltasInCurrentContig.Count > 0);

                outputSequences.Add(currentAlignmentStartOffset, new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false));
            }while (lastDelta != null);

            return(outputSequences.Values);
        }
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">Reference sequence.</param>
        /// <param name="originalQuerySequences">List of input sequences.</param>
        /// <returns>A list of sequence alignment.</returns>
        private IEnumerable <IPairwiseSequenceAlignment> Alignment(IEnumerable <ISequence> referenceSequenceList, IEnumerable <ISequence> originalQuerySequences)
        {
            ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet);

            IEnumerable <ISequence> querySequenceList =
                ForwardOnly ? originalQuerySequences
                    : (ReverseOnly
                        ? ReverseComplementSequenceList(originalQuerySequences)
                        : AddReverseComplementsToSequenceList(originalQuerySequences));

            IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>();

            var deltas = new List <DeltaAlignment>();

            foreach (ISequence refSequence in referenceSequenceList)
            {
                this.nucmerAlgo = new NUCmer(refSequence);

                if (GapOpenCost != DefaultGapOpenCost)
                {
                    this.nucmerAlgo.GapOpenCost = GapOpenCost;
                }
                if (GapExtensionCost != DefaultGapExtensionCost)
                {
                    this.nucmerAlgo.GapExtensionCost = GapExtensionCost;
                }
                if (LengthOfMUM != DefaultLengthOfMUM)
                {
                    this.nucmerAlgo.LengthOfMUM = LengthOfMUM;
                }

                // Set the ClusterBuilder properties to defaults
                if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation)
                {
                    this.nucmerAlgo.FixedSeparation = FixedSeparation;
                }
                if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation)
                {
                    this.nucmerAlgo.MaximumSeparation = MaximumSeparation;
                }
                if (MinimumScore != ClusterBuilder.DefaultMinimumScore)
                {
                    this.nucmerAlgo.MinimumScore = MinimumScore;
                }
                if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor)
                {
                    this.nucmerAlgo.SeparationFactor = SeparationFactor;
                }
                if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength)
                {
                    this.nucmerAlgo.BreakLength = BreakLength;
                }

                this.nucmerAlgo.ConsensusResolver = ConsensusResolver;
                if (SimilarityMatrix != null)
                {
                    this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix;
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    //  Check for parameters that would prevent an alignment from being returned.
                    if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore)
                    {
                        var msg = "Bad parameter settings for NucmerPairwiseAligner. " +
                                  "Tried to align a reference of length " + refSequence.Count.ToString() +
                                  " to a sequence of length " + querySequence.Count.ToString() +
                                  " while requiring a minimum score of MinimumScore = " + MinimumScore +
                                  ". This will prevent any alignments from being returned.";
                        throw new ArgumentException(msg);
                    }
                    IEnumerable <DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement());
                    deltas.AddRange(deltaAlignment);
                }
            }

            if (deltas.Count > 0)
            {
                ISequence concatReference = referenceSequenceList.ElementAt(0);
                //// concat all the sequences into one sequence
                if (referenceSequenceList.Count() > 1)
                {
                    concatReference = ConcatSequence(referenceSequenceList);
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    List <DeltaAlignment>      qDelta            = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList();
                    IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence);

                    // Convert delta alignments to sequence alignments
                    IList <PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                align.FirstSequence,
                                align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                align.FirstSequence,
                                align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }

                    results.Add(sequenceAlignment);
                }
            }

            return(results);
        }
Example #15
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable<ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49);

            // this dictionary will not grow more than a few hundread in worst scenario,
            // as this stores delta and its corresponding sequences 
            Dictionary<DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary<DeltaAlignment, ISequence>();

            long currentAlignmentStartOffset = 0;
            long currentIndex = 0;

            List<byte> currentContig = new List<byte>();
            List<DeltaAlignment> deltasToRemove = new List<DeltaAlignment>();

            // no deltas
            if (alignmentBetweenReferenceAndReads.Count == 0)
            {
                yield break;
            }

            long index = 0;

            DeltaAlignment lastDelta = alignmentBetweenReferenceAndReads[index];
            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // Get next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int symbolCounter = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        long inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        for (int i = 0; i < deltasToRemove.Count; i++)
                        {
                            deltasInCurrentContig.Remove(deltasToRemove[i]);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // check next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }
                while (deltasInCurrentContig.Count > 0);

                yield return new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false);
            }
            while (lastDelta != null);
        }
Example #16
0
        /// <summary>
        /// Performs Stage 1, 2, and 3 as described in class description.
        /// </summary>
        /// <param name="inputSequences"></param>
        /// <returns></returns>
        public IList <Bio.Algorithms.Alignment.ISequenceAlignment> Align(IEnumerable <ISequence> inputSequences)
        {
            List <ISequence> sequences = inputSequences.ToList();

            // Initializations
            if (sequences.Count > 0)
            {
                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_alphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _alphabet;
                }
            }

            // Get ProfileAligner ready
            IProfileAligner profileAligner = null;

            switch (_profileAlignerName)
            {
            case (ProfileAlignerNames.NeedlemanWunschProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new NeedlemanWunschProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new NeedlemanWunschProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            case (ProfileAlignerNames.SmithWatermanProfileAligner):
                if (_degreeOfParallelism == 1)
                {
                    profileAligner = new SmithWatermanProfileAlignerSerial(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                else
                {
                    profileAligner = new SmithWatermanProfileAlignerParallel(
                        SimilarityMatrix, _profileProfileFunctionName, GapOpenCost, GapExtensionCost, _numberOfPartitions);
                }
                break;

            default:
                throw new ArgumentException("Invalid profile aligner name");
            }

            _alignedSequences = new List <ISequence>(sequences.Count);
            float currentScore = 0;

            // STAGE 1

            Performance.Snapshot("Stage 1");
            // Generate DistanceMatrix
            KmerDistanceMatrixGenerator kmerDistanceMatrixGenerator =
                new KmerDistanceMatrixGenerator(sequences, _kmerLength, _alphabet, _distanceFunctionName);

            // Hierarchical clustering
            IHierarchicalClustering hierarcicalClustering =
                new HierarchicalClusteringParallel
                    (kmerDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

            // Generate Guide Tree
            BinaryGuideTree binaryGuideTree =
                new BinaryGuideTree(hierarcicalClustering);

            // Progressive Alignment
            IProgressiveAligner progressiveAlignerA = new ProgressiveAligner(profileAligner);

            progressiveAlignerA.Align(sequences, binaryGuideTree);

            currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerA.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);
            if (currentScore > _alignmentScoreA)
            {
                _alignmentScoreA   = currentScore;
                _alignedSequencesA = progressiveAlignerA.AlignedSequences;
            }
            if (_alignmentScoreA > _alignmentScore)
            {
                _alignmentScore   = _alignmentScoreA;
                _alignedSequences = _alignedSequencesA;
            }

            if (PAMSAMMultipleSequenceAligner.FasterVersion)
            {
                _alignedSequencesB = _alignedSequencesA;
                _alignedSequencesC = _alignedSequencesA;
                _alignmentScoreB   = _alignmentScoreA;
                _alignmentScoreC   = _alignmentScoreA;
            }
            else
            {
                BinaryGuideTree               binaryGuideTreeB              = null;
                IHierarchicalClustering       hierarcicalClusteringB        = null;
                KimuraDistanceMatrixGenerator kimuraDistanceMatrixGenerator = new KimuraDistanceMatrixGenerator();

                if (PAMSAMMultipleSequenceAligner.UseStageB)
                {
                    // STAGE 2
                    Performance.Snapshot("Stage 2");
                    // Generate DistanceMatrix from Multiple Sequence Alignment

                    int iterateTime = 0;

                    while (true)
                    {
                        ++iterateTime;
                        kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequences);

                        // Hierarchical clustering
                        hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                     (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                        // Generate Guide Tree
                        binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);

                        BinaryGuideTree.CompareTwoTrees(binaryGuideTreeB, binaryGuideTree);
                        binaryGuideTree = binaryGuideTreeB;

                        // Progressive Alignment
                        IProgressiveAligner progressiveAlignerB = new ProgressiveAligner(profileAligner);
                        progressiveAlignerB.Align(sequences, binaryGuideTreeB);

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(progressiveAlignerB.AlignedSequences, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreB)
                        {
                            _alignmentScoreB   = currentScore;
                            _alignedSequencesB = progressiveAlignerB.AlignedSequences;
                            break;
                        }
                        else
                        {
                            break;
                        }
                    }
                    if (_alignmentScoreB > _alignmentScore)
                    {
                        _alignmentScore   = _alignmentScoreB;
                        _alignedSequences = _alignedSequencesB;
                    }
                }
                else
                {
                    binaryGuideTreeB = binaryGuideTree;
                }


                // STAGE 3
                Performance.Snapshot("Stage 3");
                // refinement
                //int maxRefineMentTime = sequences.Count * 2 - 2;
                int maxRefineMentTime = 1;
                if (sequences.Count == 2)
                {
                    maxRefineMentTime = 0;
                }

                int refinementTime = 0;
                _alignedSequencesC = new List <ISequence>(sequences.Count);
                for (int i = 0; i < sequences.Count; ++i)
                {
                    _alignedSequencesC.Add(
                        new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet),
                                     _alignedSequences[i].ToArray())
                    {
                        ID       = _alignedSequences[i].ID,
                        Metadata = _alignedSequences[i].Metadata
                    });
                }

                List <int>[]        leafNodeIndices            = null;
                List <int>[]        allIndelPositions          = null;
                IProfileAlignment[] separatedProfileAlignments = null;
                List <int>[]        eStrings = null;

                while (refinementTime < maxRefineMentTime)
                {
                    ++refinementTime;
                    Performance.Snapshot("Refinement iter " + refinementTime.ToString());
                    bool needRefinement = false;
                    for (int edgeIndex = 0; edgeIndex < binaryGuideTreeB.NumberOfEdges; ++edgeIndex)
                    {
                        leafNodeIndices = binaryGuideTreeB.SeparateSequencesByCuttingTree(edgeIndex);

                        allIndelPositions = new List <int> [2];

                        separatedProfileAlignments = ProfileAlignment.ProfileExtraction(_alignedSequencesC, leafNodeIndices[0], leafNodeIndices[1], out allIndelPositions);
                        eStrings = new List <int> [2];

                        if (separatedProfileAlignments[0].NumberOfSequences < separatedProfileAlignments[1].NumberOfSequences)
                        {
                            profileAligner.Align(separatedProfileAlignments[0], separatedProfileAlignments[1]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedA);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedB);
                        }
                        else
                        {
                            profileAligner.Align(separatedProfileAlignments[1], separatedProfileAlignments[0]);
                            eStrings[0] = profileAligner.GenerateEString(profileAligner.AlignedB);
                            eStrings[1] = profileAligner.GenerateEString(profileAligner.AlignedA);
                        }

                        for (int set = 0; set < 2; ++set)
                        {
                            Parallel.ForEach(leafNodeIndices[set], PAMSAMMultipleSequenceAligner.parallelOption, i =>
                            {
                                //Sequence seq = new Sequence(_alphabet, "");
                                List <byte> seqBytes = new List <byte>();

                                int indexAllIndel = 0;
                                for (int j = 0; j < _alignedSequencesC[i].Count; ++j)
                                {
                                    if (indexAllIndel < allIndelPositions[set].Count && j == allIndelPositions[set][indexAllIndel])
                                    {
                                        ++indexAllIndel;
                                    }
                                    else
                                    {
                                        seqBytes.Add(_alignedSequencesC[i][j]);
                                    }
                                }

                                _alignedSequencesC[i]    = profileAligner.GenerateSequenceFromEString(eStrings[set], new Sequence(Alphabets.GetAmbiguousAlphabet(_alphabet), seqBytes.ToArray()));
                                _alignedSequencesC[i].ID = _alignedSequencesC[i].ID;
                                (_alignedSequencesC[i] as Sequence).Metadata = _alignedSequencesC[i].Metadata;
                            });
                        }

                        currentScore = MsaUtils.MultipleAlignmentScoreFunction(_alignedSequencesC, SimilarityMatrix, GapOpenCost, GapExtensionCost);

                        if (currentScore > _alignmentScoreC)
                        {
                            _alignmentScoreC = currentScore;
                            needRefinement   = true;

                            // recreate the tree
                            kimuraDistanceMatrixGenerator.GenerateDistanceMatrix(_alignedSequencesC);
                            hierarcicalClusteringB = new HierarchicalClusteringParallel
                                                         (kimuraDistanceMatrixGenerator.DistanceMatrix, _hierarchicalClusteringMethodName);

                            binaryGuideTreeB = new BinaryGuideTree(hierarcicalClusteringB);
                            break;
                        }
                    }
                    if (!needRefinement)
                    {
                        refinementTime = maxRefineMentTime;
                        break;
                    }
                }
                if (_alignmentScoreC > _alignmentScore)
                {
                    _alignmentScore   = _alignmentScoreC;
                    _alignedSequences = _alignedSequencesC;
                }
                Performance.Snapshot("Stop Stage 3");
            }

            //just for the purpose of integrating PW and MSA with the same output
            IList <Bio.Algorithms.Alignment.ISequenceAlignment> results = new List <Bio.Algorithms.Alignment.ISequenceAlignment>();

            return(results);
        }
Example #17
0
        /// <summary>
        /// Generates consensus sequences from alignment layout.
        /// </summary>
        /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param>
        /// <returns>List of contigs.</returns>
        public static IEnumerable <ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads)
        {
            if (alignmentBetweenReferenceAndReads == null)
            {
                throw new ArgumentNullException("alignmentBetweenReferenceAndReads");
            }

            SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49);

            // this dictionary will not grow more than a few hundread in worst scenario,
            // as this stores delta and its corresponding sequences
            Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>();

            long           currentAlignmentStartOffset = 0;
            long           currentIndex = 0;
            long           inDeltaIndex = 0;
            DeltaAlignment lastDelta;

            List <byte>           currentContig  = new List <byte>();
            List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>();

            // no deltas
            if (alignmentBetweenReferenceAndReads.Count == 0)
            {
                yield break;
            }

            long index = 0;

            lastDelta = alignmentBetweenReferenceAndReads[index];
            do
            {
                // Starting a new contig
                if (deltasInCurrentContig.Count == 0)
                {
                    currentAlignmentStartOffset = lastDelta.FirstSequenceStart;
                    currentIndex = 0;
                    currentContig.Clear();
                }

                // loop through all deltas at current index and find consensus
                do
                {
                    // Proceed creating consensus till we find another delta stats aligning
                    while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // Get next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // see if new delta starts from the same offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }

                    byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count];
                    int    symbolCounter         = 0;

                    foreach (var delta in deltasInCurrentContig)
                    {
                        inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset);
                        symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex];

                        if (inDeltaIndex == delta.Value.Count - 1)
                        {
                            deltasToRemove.Add(delta.Key);
                        }
                    }

                    if (deltasToRemove.Count > 0)
                    {
                        for (int i = 0; i < deltasToRemove.Count; i++)
                        {
                            deltasInCurrentContig.Remove(deltasToRemove[i]);
                        }

                        deltasToRemove.Clear();
                    }

                    byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex);
                    currentContig.Add(consensusSymbol);

                    currentIndex++;

                    // See if another delta is adjacent
                    if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex)
                    {
                        deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta));

                        // check next delta
                        index++;
                        if (alignmentBetweenReferenceAndReads.Count > index)
                        {
                            lastDelta = alignmentBetweenReferenceAndReads[index];
                            continue; // read next delta to see if it starts from current reference sequence offset
                        }
                        else
                        {
                            lastDelta = null;
                        }
                    }
                }while (deltasInCurrentContig.Count > 0);

                yield return(new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false));
            }while (lastDelta != null);
        }
Example #18
0
        /// <summary>
        /// This method is considered as main execute method which defines the
        /// step by step algorithm. Derived class flows the defined flow by this
        /// method.
        /// </summary>
        /// <param name="referenceSequenceList">Reference sequence.</param>
        /// <param name="originalQuerySequences">List of input sequences.</param>
        /// <returns>A list of sequence alignment.</returns>
        private IEnumerable<IPairwiseSequenceAlignment> Alignment(IEnumerable<ISequence> referenceSequenceList, IEnumerable<ISequence> originalQuerySequences)
        {
            ConsensusResolver = new SimpleConsensusResolver(referenceSequenceList.ElementAt(0).Alphabet);

            IEnumerable<ISequence> querySequenceList = 
                ForwardOnly ? originalQuerySequences
                    : (ReverseOnly
                        ? ReverseComplementSequenceList(originalQuerySequences)
                        : AddReverseComplementsToSequenceList(originalQuerySequences));

            IList<IPairwiseSequenceAlignment> results = new List<IPairwiseSequenceAlignment>();

            var deltas = new List<DeltaAlignment>();

            foreach (ISequence refSequence in referenceSequenceList)
            {
                this.nucmerAlgo = new NUCmer(refSequence);

                if (GapOpenCost != DefaultGapOpenCost) this.nucmerAlgo.GapOpenCost = GapOpenCost;
                if (GapExtensionCost != DefaultGapExtensionCost) this.nucmerAlgo.GapExtensionCost = GapExtensionCost;
                if (LengthOfMUM != DefaultLengthOfMUM) this.nucmerAlgo.LengthOfMUM = LengthOfMUM;

                // Set the ClusterBuilder properties to defaults
                if (FixedSeparation != ClusterBuilder.DefaultFixedSeparation) this.nucmerAlgo.FixedSeparation = FixedSeparation;
                if (MaximumSeparation != ClusterBuilder.DefaultMaximumSeparation) this.nucmerAlgo.MaximumSeparation = MaximumSeparation;
                if (MinimumScore != ClusterBuilder.DefaultMinimumScore) this.nucmerAlgo.MinimumScore = MinimumScore;
                if (SeparationFactor != ClusterBuilder.DefaultSeparationFactor) this.nucmerAlgo.SeparationFactor = SeparationFactor;
                if (BreakLength != ModifiedSmithWaterman.DefaultBreakLength) this.nucmerAlgo.BreakLength = BreakLength;

                this.nucmerAlgo.ConsensusResolver = ConsensusResolver;
                if (SimilarityMatrix != null) this.nucmerAlgo.SimilarityMatrix = SimilarityMatrix;

                foreach (ISequence querySequence in querySequenceList)
                {
                    //  Check for parameters that would prevent an alignment from being returned.
                    if (Math.Min(querySequence.Count, refSequence.Count) < MinimumScore)
                    {
                        var msg = "Bad parameter settings for NucmerPairwiseAligner. " +
                                   "Tried to align a reference of length " + refSequence.Count.ToString() +
                                   " to a sequence of length " + querySequence.Count.ToString() +
                                   " while requiring a minimum score of MinimumScore = " + MinimumScore +
                                   ". This will prevent any alignments from being returned.";
                        throw new ArgumentException(msg);
                    }
                    IEnumerable<DeltaAlignment> deltaAlignment = this.nucmerAlgo.GetDeltaAlignments(querySequence, !MaxMatch, querySequence.IsMarkedAsReverseComplement());
                    deltas.AddRange(deltaAlignment);
                }
            }

            if (deltas.Count > 0)
            {
                ISequence concatReference = referenceSequenceList.ElementAt(0);
                //// concat all the sequences into one sequence
                if (referenceSequenceList.Count() > 1)
                {
                    concatReference = ConcatSequence(referenceSequenceList);
                }

                foreach (ISequence querySequence in querySequenceList)
                {
                    List<DeltaAlignment> qDelta = deltas.Where(d => d.QuerySequence.Equals(querySequence)).ToList();
                    IPairwiseSequenceAlignment sequenceAlignment = new PairwiseSequenceAlignment(concatReference, querySequence);

                    // Convert delta alignments to sequence alignments
                    IList<PairwiseAlignedSequence> alignments = ConvertDeltaToAlignment(qDelta);

                    if (alignments.Count > 0)
                    {
                        foreach (PairwiseAlignedSequence align in alignments)
                        {
                            // Calculate the score of alignment
                            align.Score = CalculateScore(
                                    align.FirstSequence,
                                    align.SecondSequence);

                            // Make Consensus
                            align.Consensus = MakeConsensus(
                                    align.FirstSequence,
                                    align.SecondSequence);

                            sequenceAlignment.PairwiseAlignedSequences.Add(align);
                        }
                    }

                    results.Add(sequenceAlignment);
                }
            }

            return results;
        }