Пример #1
0
        /// <summary>
        /// Method to merge higher-indexed item with new constructed contig.
        /// Merges consumed contig with new contig. For each sequence in consumed contig,
        /// compute sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedContig">Consumed Contig to be merged</param>
        private static void MergeHigherIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig)
        {
            foreach (Contig.AssembledSequence aseq in consumedContig.Sequences)
            {
                Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

                // as the higher-index item, this contig is never reversed or complemented, so:
                newASeq.IsReversed     = aseq.IsReversed;
                newASeq.IsComplemented = aseq.IsComplemented;

                // position in the new contig adjusted by alignment of the merged items.
                newASeq.Position = globalBest.SecondOffset + aseq.Position;
                newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence);

                newContig.Sequences.Add(newASeq);
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "\tseq (rev = {0} comp = {1} pos = {2}) {3}",
                        newASeq.IsReversed,
                        newASeq.IsComplemented,
                        newASeq.Position,
                        newASeq.Sequence);
                }
            }
        }
Пример #2
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// </summary>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        private void MakeConsensus(Contig contig)
        {
            Sequence             consensusSequence = new Sequence(_sequenceAlphabet);
            List <ISequenceItem> positionItems     = new List <ISequenceItem>();

            // there's no simple way to pre-guess the length of the contig
            int position = 0;

            while (true)
            {
                // Initializations
                positionItems.Clear();

                foreach (Contig.AssembledSequence aseq in contig.Sequences)
                {
                    if (position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count)
                    {
                        int seqPos;
                        if (aseq.IsReversed)
                        {
                            seqPos = (aseq.Sequence.Count - 1) - (position - aseq.Position);
                        }
                        else
                        {
                            seqPos = position - aseq.Position;
                        }

                        if (aseq.IsComplemented)
                        {
                            positionItems.Add(aseq.Sequence.Complement[seqPos]);
                        }
                        else
                        {
                            positionItems.Add(aseq.Sequence[seqPos]);
                        }
                    }
                }

                if (positionItems.Count == 0)
                {
                    // This means no sequences at this position. We're done
                    contig.Consensus = consensusSequence;
                    return;
                }
                else
                {
                    consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems));
                }

                position++;
            }
        }
Пример #3
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// Public method to allow testing of consensus generation part.
        /// Used by test automation.
        /// </summary>
        /// <param name="alphabet">Sequence alphabet</param>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        public void MakeConsensus(IAlphabet alphabet, Contig contig)
        {
            _sequenceAlphabet = alphabet;
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
            }

            MakeConsensus(contig);
        }
Пример #4
0
        /// <summary>
        /// Write contig to application log
        /// </summary>
        /// <param name="contig">contig to be dumped</param>
        private static void Dump(Contig contig)
        {
            ApplicationLog.WriteLine("contig has {0} seqs, length {1}", contig.Sequences.Count, contig.Length);
            ApplicationLog.WriteLine("consensus: {0}", contig.Consensus);
            foreach (Contig.AssembledSequence aseq in contig.Sequences)
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    aseq.IsReversed,
                    aseq.IsComplemented,
                    aseq.Position,
                    aseq.Sequence);
            }

            ApplicationLog.WriteLine(string.Empty);
        }
Пример #5
0
        /// <summary>
        /// Method to merge higher-indexed item with new constructed contig.
        /// Merges consumed sequence with new contig. For the consumed sequence,
        /// compute new sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedSequence">Consumed Sequence to be merged</param>
        private static void MergeHigherIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence)
        {
            Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

            // as the higher-index item, this sequence is never reversed or complemented, so:
            newASeq.IsReversed     = false;
            newASeq.IsComplemented = false;
            newASeq.Position       = globalBest.SecondOffset;
            newASeq.Sequence       = SequenceWithoutTerminalGaps(consumedSequence);

            newContig.Sequences.Add(newASeq);
            if (Trace.Want(Trace.AssemblyDetails))
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    newASeq.IsReversed,
                    newASeq.IsComplemented,
                    newASeq.Position,
                    newASeq.Sequence);
            }
        }
Пример #6
0
        /// <summary>
        /// Method to merge lower-indexed item with new constructed contig
        /// Merges consumed sequence with new contig. For the consumed sequence,
        /// compute new sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedSequence">Consumed Sequence to be merged</param>
        private static void MergeLowerIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence)
        {
            Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

            // lower-indexed item might be reversed or complemented.
            // Retreive information from globalBest
            newASeq.IsReversed     = globalBest.Reversed;
            newASeq.IsComplemented = globalBest.Complemented;
            newASeq.Position       = globalBest.FirstOffset;
            newASeq.Sequence       = SequenceWithoutTerminalGaps(consumedSequence);

            newContig.Sequences.Add(newASeq);
            if (Trace.Want(Trace.AssemblyDetails))
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    newASeq.IsReversed,
                    newASeq.IsComplemented,
                    newASeq.Position,
                    newASeq.Sequence);
            }
        }
Пример #7
0
        /// <summary>
        /// Method to merge lower-indexed item with new constructed contig.
        /// Merges consumed contig with new contig. For each sequence in consumed contig,
        /// compute sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score along with offsets information</param>
        /// <param name="consumedContig">Contig to be merged</param>
        private static void MergeLowerIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig)
        {
            foreach (Contig.AssembledSequence aseq in consumedContig.Sequences)
            {
                Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

                // lower-indexed item might be reversed or complemented.
                // Construct new sequence based on setting in globalBest
                // reverse of reverse, or comp of comp, equals no-op. So use xor
                newASeq.IsReversed     = aseq.IsReversed ^ globalBest.Reversed;
                newASeq.IsComplemented = aseq.IsComplemented ^ globalBest.Complemented;

                // position in the new contig is adjusted by alignment of the merged items.
                // this depends on whether the contig is reverse-aligned.
                if (globalBest.Reversed)
                {
                    int rightOffset = consumedContig.Length - (aseq.Sequence.Count + aseq.Position);
                    newASeq.Position = globalBest.FirstOffset + rightOffset;
                }
                else
                {
                    newASeq.Position = globalBest.FirstOffset + aseq.Position;
                }

                newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence);
                newContig.Sequences.Add(newASeq);
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "\tseq (rev = {0} comp = {1} pos = {2}) {3}",
                        newASeq.IsReversed,
                        newASeq.IsComplemented,
                        newASeq.Position,
                        newASeq.Sequence);
                }
            }
        }
Пример #8
0
 /// <summary>
 /// Initializes a new instance of the PoolItem class.
 /// constructor for contig.
 /// </summary>
 /// <param name="item">Pool object</param>
 internal PoolItem(Contig item)
     : this(item, true)
 {
 }
Пример #9
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs.
        /// </summary>
        /// <remarks>
        /// The algorithm is:
        /// 1.  initialize list of contigs to empty list. List of seqs is passed as argument.
        /// 2.  compute pairwise overlap scores for each pair of input seqs (with reversal and
        ///     complementation as appropriate).
        /// 3.  choose best overlap score. the “merge items” (can be seqs or contigs) are the
        ///     items with that score. If best score is less than threshold, assembly is finished.
        /// 4.  merge the merge items into a single contig and remove them from their list(s)
        /// 5.  compute the overlap between new item and all existing items
        /// 6.  go to step 3
        /// </remarks>
        /// <param name="inputSequences">The sequences to assemble.</param>
        /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of
        /// contigs and list of unmerged sequences which are result of this assembly.</returns>
        public IDeNovoAssembly Assemble(IList <ISequence> inputSequences)
        {
            // Initializations
            if (inputSequences.Count > 0)
            {
                _sequenceAlphabet = inputSequences[0].Alphabet;

                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
                }
            }

            OverlapDeNovoAssembly sequenceAssembly = null;

            // numbering convention: every pool item (whether sequence or contig)
            // gets a fixed number.
            // sequence index = index into inputs (which we won't modify)
            // contig index = nSequences + index into contigs
            List <PoolItem> pool = new List <PoolItem>();

            foreach (ISequence seq in inputSequences)
            {
                pool.Add(new PoolItem(seq));
            }

            // put all the initial sequences into the pool, and generate the pair scores.
            // there are no contigs in the pool yet.
            // to save an iteration, we'll also find the best global score as we go.
            ItemScore globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
            int       globalBestLargerIndex = -1;
            int       unconsumedCount       = inputSequences.Count;

            // Compute alignment scores for all combinations between input sequences
            // Store these scores in the poolItem correspodning to each sequence
            for (int newSeq = 0; newSeq < pool.Count; ++newSeq)
            {
                PoolItem newItem = pool[newSeq];
                for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq)
                {
                    PoolItem  oldItem = pool[oldSeq];
                    ItemScore score   = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                    newItem.Scores.Add(score);
                    if (score.OverlapScore > globalBest.OverlapScore)
                    {
                        globalBest            = new ItemScore(score);
                        globalBestLargerIndex = newSeq;
                    }
                }
            }

            // Merge sequence if best score is above threshold
            // and add new contig to pool
            if (globalBest.OverlapScore >= MergeThreshold)
            {
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore);
                }

                PoolItem mergeItem1 = pool[globalBest.OtherItem];
                PoolItem mergeItem2 = pool[globalBestLargerIndex];
                Contig   newContig  = new Contig();
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "new pool item {0} will merge old items {1} and {2}",
                        pool.Count,
                        globalBest.OtherItem,
                        globalBestLargerIndex);
                }

                MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);

                MakeConsensus(newContig);

                // Set ConsumedBy value and
                // free memory as these sequences are no longer used
                mergeItem1.ConsumedBy = pool.Count;
                mergeItem2.ConsumedBy = pool.Count;
                mergeItem1.FreeSequences();
                mergeItem2.FreeSequences();
                pool.Add(new PoolItem(newContig));
                unconsumedCount--;

                while (unconsumedCount > 1)
                {
                    // Compute scores for each unconsumed sequence with new contig
                    globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    int      newSeq  = pool.Count - 1;
                    PoolItem newItem = pool[newSeq];
                    for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq)
                    {
                        PoolItem oldItem = pool[oldSeq];
                        if (oldItem.ConsumedBy >= 0)
                        {
                            // already consumed - just add dummy score to maintain correct indices
                            newItem.Scores.Add(new ItemScore());
                        }
                        else
                        {
                            ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                            newItem.Scores.Add(score);
                        }
                    }

                    // find best global score in the modified pool.
                    globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    for (int current = 0; current < pool.Count; ++current)
                    {
                        PoolItem curItem = pool[current];
                        if (curItem.ConsumedBy < 0)
                        {
                            for (int other = 0; other < current; ++other)
                            {
                                if (pool[other].ConsumedBy < 0)
                                {
                                    ItemScore itemScore = curItem.Scores[other];
                                    if (itemScore.OverlapScore > globalBest.OverlapScore)
                                    {
                                        globalBest            = new ItemScore(itemScore); // copy the winner so far
                                        globalBestLargerIndex = current;
                                    }
                                }
                            }
                        }
                    }

                    if (globalBest.OverlapScore >= MergeThreshold)
                    {
                        // Merge sequences / contigs if above threshold
                        mergeItem1 = pool[globalBest.OtherItem];
                        mergeItem2 = pool[globalBestLargerIndex];
                        newContig  = new Contig();

                        if (mergeItem1.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                        }

                        if (mergeItem2.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                        }

                        MakeConsensus(newContig);
                        if (Trace.Want(Trace.AssemblyDetails))
                        {
                            Dump(newContig);
                        }

                        // Set ConsumedBy value for these poolItems and
                        // free memory as these sequences are no longer used
                        mergeItem1.ConsumedBy = pool.Count;
                        mergeItem2.ConsumedBy = pool.Count;
                        mergeItem1.FreeSequences();
                        mergeItem2.FreeSequences();

                        pool.Add(new PoolItem(newContig));
                        unconsumedCount--;
                    }
                    else
                    {
                        // None of the alignment scores cross threshold
                        // No more merges possible. So end iteration.
                        break;
                    }
                }
            }

            // no further qualifying merges, so we're done.
            // populate contigs and unmergedSequences
            sequenceAssembly = new OverlapDeNovoAssembly();
            foreach (PoolItem curItem in pool)
            {
                if (curItem.ConsumedBy < 0)
                {
                    if (curItem.IsContig)
                    {
                        sequenceAssembly.Contigs.Add(curItem.Contig);
                    }
                    else
                    {
                        sequenceAssembly.UnmergedSequences.Add(curItem.Sequence);
                    }
                }
            }

            return(sequenceAssembly);
        }