示例#1
0
        /// <summary>
        /// Method to merge higher-indexed item with new constructed contig.
        /// Merges consumed contig with new contig. For each sequence in consumed contig,
        /// compute sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedContig">Consumed Contig to be merged</param>
        private static void MergeHigherIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig)
        {
            foreach (Contig.AssembledSequence aseq in consumedContig.Sequences)
            {
                Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

                // as the higher-index item, this contig is never reversed or complemented, so:
                newASeq.IsReversed     = aseq.IsReversed;
                newASeq.IsComplemented = aseq.IsComplemented;

                // position in the new contig adjusted by alignment of the merged items.
                newASeq.Position = globalBest.SecondOffset + aseq.Position;
                newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence);

                newContig.Sequences.Add(newASeq);
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "\tseq (rev = {0} comp = {1} pos = {2}) {3}",
                        newASeq.IsReversed,
                        newASeq.IsComplemented,
                        newASeq.Position,
                        newASeq.Sequence);
                }
            }
        }
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// </summary>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        private void MakeConsensus(Contig contig)
        {
            List <byte> positionItems = new List <byte>(), consensusSequence = new List <byte>();

            // there's no simple way to pre-guess the length of the contig
            long position = 0;

            while (true)
            {
                // Initialization
                positionItems.Clear();

                // Add the sequences
                positionItems.AddRange(from aseq in contig.Sequences
                                       where position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count
                                       let seqPos = aseq.IsReversed ? (aseq.Sequence.Count() - 1) - (position - aseq.Position) : position - aseq.Position
                                                    select aseq.IsComplemented ? aseq.Sequence.GetComplementedSequence()[seqPos] : aseq.Sequence[seqPos]);

                if (positionItems.Count == 0)
                {
                    // This means no sequences at this position. We're done
                    contig.Consensus = new Sequence(Alphabets.AmbiguousAlphabetMap[_sequenceAlphabet], consensusSequence.ToArray());
                    return;
                }

                consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems.ToArray()));
                position++;
            }
        }
示例#3
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// </summary>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        private void MakeConsensus(Contig contig)
        {
            Sequence             consensusSequence = new Sequence(_sequenceAlphabet);
            List <ISequenceItem> positionItems     = new List <ISequenceItem>();

            // there's no simple way to pre-guess the length of the contig
            int position = 0;

            while (true)
            {
                // Initializations
                positionItems.Clear();

                foreach (Contig.AssembledSequence aseq in contig.Sequences)
                {
                    if (position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count)
                    {
                        int seqPos;
                        if (aseq.IsReversed)
                        {
                            seqPos = (aseq.Sequence.Count - 1) - (position - aseq.Position);
                        }
                        else
                        {
                            seqPos = position - aseq.Position;
                        }

                        if (aseq.IsComplemented)
                        {
                            positionItems.Add(aseq.Sequence.Complement[seqPos]);
                        }
                        else
                        {
                            positionItems.Add(aseq.Sequence[seqPos]);
                        }
                    }
                }

                if (positionItems.Count == 0)
                {
                    // This means no sequences at this position. We're done
                    contig.Consensus = consensusSequence;
                    return;
                }
                else
                {
                    consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems));
                }

                position++;
            }
        }
示例#4
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// </summary>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        private void MakeConsensus(Contig contig)
        {
            List <byte> positionItems     = new List <byte>();
            List <byte> consensusSequence = new List <byte>();

            // there's no simple way to pre-guess the length of the contig
            long position = 0;

            while (true)
            {
                // Initializations
                positionItems.Clear();

                foreach (Contig.AssembledSequence aseq in contig.Sequences)
                {
                    if (position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count)
                    {
                        long seqPos;
                        if (aseq.IsReversed)
                        {
                            seqPos = (aseq.Sequence.Count() - 1) - (position - aseq.Position);
                        }
                        else
                        {
                            seqPos = position - aseq.Position;
                        }

                        if (aseq.IsComplemented)
                        {
                            positionItems.Add(aseq.Sequence.GetComplementedSequence()[seqPos]);
                        }
                        else
                        {
                            positionItems.Add(aseq.Sequence[seqPos]);
                        }
                    }
                }

                if (positionItems.Count == 0)
                {
                    // This means no sequences at this position. We're done
                    contig.Consensus = new Sequence(Alphabets.AmbiguousAlphabetMap[_sequenceAlphabet], consensusSequence.ToArray());
                    return;
                }
                else
                {
                    consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems.ToArray()));
                }

                position++;
            }
        }
示例#5
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// Public method to allow testing of consensus generation part.
        /// Used by test automation.
        /// </summary>
        /// <param name="alphabet">Sequence alphabet</param>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        public void MakeConsensus(IAlphabet alphabet, Contig contig)
        {
            _sequenceAlphabet = alphabet;
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
            }

            MakeConsensus(contig);
        }
 /// <summary>
 /// Write out a set of contigs to the given file.
 /// </summary>
 /// <param name="formatter">Formatter</param>
 /// <param name="contig">Contig to write</param>
 public static void Format(this XsvContigFormatter formatter, Contig contig)
 {
     if (formatter == null)
     {
         throw new ArgumentNullException("formatter");
     }
     if (contig == null)
     {
         throw new ArgumentNullException("contig");
     }
     var fs = ParserFormatterExtensions<ISequenceFormatter>.GetOpenStream(formatter, true);
     if (fs != null)
         formatter.Write(fs, contig);
     else
         throw new Exception("You must open a formatter before calling Write.");
 }
示例#7
0
        /// <summary>
        /// Write contig to application log
        /// </summary>
        /// <param name="contig">contig to be dumped</param>
        private static void Dump(Contig contig)
        {
            ApplicationLog.WriteLine("contig has {0} seqs, length {1}", contig.Sequences.Count, contig.Length);
            ApplicationLog.WriteLine("consensus: {0}", contig.Consensus);
            foreach (Contig.AssembledSequence aseq in contig.Sequences)
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    aseq.IsReversed,
                    aseq.IsComplemented,
                    aseq.Position,
                    aseq.Sequence);
            }

            ApplicationLog.WriteLine(string.Empty);
        }
        /// <summary>
        /// Write out a set of contigs to the given file.
        /// </summary>
        /// <param name="formatter">Formatter</param>
        /// <param name="contig">Contig to write</param>
        /// <param name="filename">Filename</param>
        public static void Format(this XsvContigFormatter formatter, Contig contig, string filename)
        {
            if (formatter == null)
            {
                throw new ArgumentNullException("formatter");
            }
            if (contig == null)
            {
                throw new ArgumentNullException("contig");
            }
            if (string.IsNullOrWhiteSpace(filename))
            {
                throw new ArgumentNullException("filename");
            }

            using (var fs = File.Create(filename))
            {
                formatter.Write(fs, contig);
            }
        }
示例#9
0
        /// <summary>
        /// Method to merge higher-indexed item with new constructed contig.
        /// Merges consumed sequence with new contig. For the consumed sequence,
        /// compute new sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedSequence">Consumed Sequence to be merged</param>
        private static void MergeHigherIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence)
        {
            Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

            // as the higher-index item, this sequence is never reversed or complemented, so:
            newASeq.IsReversed     = false;
            newASeq.IsComplemented = false;
            newASeq.Position       = globalBest.SecondOffset;
            newASeq.Sequence       = SequenceWithoutTerminalGaps(consumedSequence);

            newContig.Sequences.Add(newASeq);
            if (Trace.Want(Trace.AssemblyDetails))
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    newASeq.IsReversed,
                    newASeq.IsComplemented,
                    newASeq.Position,
                    newASeq.Sequence);
            }
        }
示例#10
0
        /// <summary>
        /// Method to merge lower-indexed item with new constructed contig
        /// Merges consumed sequence with new contig. For the consumed sequence,
        /// compute new sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedSequence">Consumed Sequence to be merged</param>
        private static void MergeLowerIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence)
        {
            Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

            // lower-indexed item might be reversed or complemented.
            // Retreive information from globalBest
            newASeq.IsReversed     = globalBest.Reversed;
            newASeq.IsComplemented = globalBest.Complemented;
            newASeq.Position       = globalBest.FirstOffset;
            newASeq.Sequence       = SequenceWithoutTerminalGaps(consumedSequence);

            newContig.Sequences.Add(newASeq);
            if (Trace.Want(Trace.AssemblyDetails))
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    newASeq.IsReversed,
                    newASeq.IsComplemented,
                    newASeq.Position,
                    newASeq.Sequence);
            }
        }
示例#11
0
        /// <summary>
        /// Method to merge lower-indexed item with new constructed contig.
        /// Merges consumed contig with new contig. For each sequence in consumed contig,
        /// compute sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score along with offsets information</param>
        /// <param name="consumedContig">Contig to be merged</param>
        private static void MergeLowerIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig)
        {
            foreach (Contig.AssembledSequence aseq in consumedContig.Sequences)
            {
                Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

                // lower-indexed item might be reversed or complemented.
                // Construct new sequence based on setting in globalBest
                // reverse of reverse, or comp of comp, equals no-op. So use xor
                newASeq.IsReversed     = aseq.IsReversed ^ globalBest.Reversed;
                newASeq.IsComplemented = aseq.IsComplemented ^ globalBest.Complemented;

                // position in the new contig is adjusted by alignment of the merged items.
                // this depends on whether the contig is reverse-aligned.
                if (globalBest.Reversed)
                {
                    long rightOffset = consumedContig.Length - (aseq.Sequence.Count + aseq.Position);
                    newASeq.Position = globalBest.FirstOffset + rightOffset;
                }
                else
                {
                    newASeq.Position = globalBest.FirstOffset + aseq.Position;
                }

                newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence);
                newContig.Sequences.Add(newASeq);
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "\tseq (rev = {0} comp = {1} pos = {2}) {3}",
                        newASeq.IsReversed,
                        newASeq.IsComplemented,
                        newASeq.Position,
                        newASeq.Sequence);
                }
            }
        }
示例#12
0
        /// <summary>
        /// Formats a (sparse) contig to a character-separated value file,
        /// writing the consensus first, followed by the sequence separator,
        /// and each assembled sequences followed by the sequence separator.
        /// The consensus has an offset of 0, while the assembled sequences have the
        /// offset as present in AssembledSequence.Position.
        /// </summary>
        /// <param name="stream">Stream to write to, it is left open at the end.</param>
        /// <param name="contig">The contig to format as a set of sparse sequences.</param>
        public void Write (Stream stream, Contig contig) 
        {
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }

            if (contig == null)
            {
                throw new ArgumentNullException("contig");
            }

            // Write the consensus sequence out.
            base.Format(stream, contig.Consensus);

            // Write out the contigs.
            using (StreamWriter writer = stream.OpenWrite(leaveOpen: true))
            {
                foreach (Contig.AssembledSequence aSeq in contig.Sequences)
                {
                    this.Write(writer, aSeq.Sequence, (long)aSeq.Sequence.Metadata[XsvSparseParser.MetadataOffsetKey]);
                }
            }
        }
示例#13
0
        /// <summary>
        /// Method to merge higher-indexed item with new constructed contig.
        /// Merges consumed contig with new contig. For each sequence in consumed contig, 
        /// compute sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedContig">Consumed Contig to be merged</param>
        private static void MergeHigherIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig)
        {
            foreach (Contig.AssembledSequence aseq in consumedContig.Sequences)
            {
                Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

                // as the higher-index item, this contig is never reversed or complemented, so:
                newASeq.IsReversed = aseq.IsReversed;
                newASeq.IsComplemented = aseq.IsComplemented;

                // position in the new contig adjusted by alignment of the merged items.
                newASeq.Position = globalBest.SecondOffset + aseq.Position;
                newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence);

                newContig.Sequences.Add(newASeq);
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "\tseq (rev = {0} comp = {1} pos = {2}) {3}",
                        newASeq.IsReversed,
                        newASeq.IsComplemented,
                        newASeq.Position,
                        newASeq.Sequence);
                }
            }
        }
示例#14
0
        /// <summary>
        /// Method to merge lower-indexed item with new constructed contig
        /// Merges consumed sequence with new contig. For the consumed sequence,
        /// compute new sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedSequence">Consumed Sequence to be merged</param>
        private static void MergeLowerIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence)
        {
            Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

            // lower-indexed item might be reversed or complemented. 
            // Retreive information from globalBest
            newASeq.IsReversed = globalBest.Reversed;
            newASeq.IsComplemented = globalBest.Complemented;
            newASeq.Position = globalBest.FirstOffset;
            newASeq.Sequence = SequenceWithoutTerminalGaps(consumedSequence);

            newContig.Sequences.Add(newASeq);
            if (Trace.Want(Trace.AssemblyDetails))
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    newASeq.IsReversed,
                    newASeq.IsComplemented,
                    newASeq.Position,
                    newASeq.Sequence);
            }
        }
示例#15
0
        /// <summary>
        /// Method to merge lower-indexed item with new constructed contig.
        /// Merges consumed contig with new contig. For each sequence in consumed contig, 
        /// compute sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score along with offsets information</param>
        /// <param name="consumedContig">Contig to be merged</param>
        private static void MergeLowerIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig)
        {
            foreach (Contig.AssembledSequence aseq in consumedContig.Sequences)
            {
                Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

                // lower-indexed item might be reversed or complemented. 
                // Construct new sequence based on setting in globalBest
                // reverse of reverse, or comp of comp, equals no-op. So use xor
                newASeq.IsReversed = aseq.IsReversed ^ globalBest.Reversed;
                newASeq.IsComplemented = aseq.IsComplemented ^ globalBest.Complemented;

                // position in the new contig is adjusted by alignment of the merged items.
                // this depends on whether the contig is reverse-aligned.
                if (globalBest.Reversed)
                {
                    long rightOffset = consumedContig.Length - (aseq.Sequence.Count + aseq.Position);
                    newASeq.Position = globalBest.FirstOffset + rightOffset;
                }
                else
                {
                    newASeq.Position = globalBest.FirstOffset + aseq.Position;
                }

                newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence);
                newContig.Sequences.Add(newASeq);
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "\tseq (rev = {0} comp = {1} pos = {2}) {3}",
                        newASeq.IsReversed,
                        newASeq.IsComplemented,
                        newASeq.Position,
                        newASeq.Sequence);
                }
            }
        }
示例#16
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// Public method to allow testing of consensus generation part.
        /// Used by test automation.
        /// </summary>
        /// <param name="alphabet">Sequence alphabet</param>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        public void MakeConsensus(IAlphabet alphabet, Contig contig)
        {
            _sequenceAlphabet = alphabet;
            if (ConsensusResolver == null)
            {
                ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
            }
            else
            {
                ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
            }

            MakeConsensus(contig);
        }
示例#17
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs. 
        /// </summary>
        /// <remarks>
        /// The algorithm is:
        /// 1.  initialize list of contigs to empty list. List of seqs is passed as argument.
        /// 2.  compute pairwise overlap scores for each pair of input seqs (with reversal and
        ///     complementation as appropriate).
        /// 3.  choose best overlap score. the “merge items” (can be seqs or contigs) are the 
        ///     items with that score. If best score is less than threshold, assembly is finished.
        /// 4.  merge the merge items into a single contig and remove them from their list(s)
        /// 5.  compute the overlap between new item and all existing items
        /// 6.  go to step 3
        /// </remarks>
        /// <param name="inputSequences">The sequences to assemble.</param>
        /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of 
        /// contigs and list of unmerged sequences which are result of this assembly.</returns>
        public IDeNovoAssembly Assemble(IEnumerable<ISequence> inputSequences)
        {
            if (null == inputSequences)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameInputSequences);
            }

            // numbering convention: every pool item (whether sequence or contig)
            // gets a fixed number.
            // sequence index = index into inputs (which we won't modify)
            // contig index = nSequences + index into contigs
            List<PoolItem> pool = inputSequences.Select(seq => new PoolItem(seq)).ToList();

            // Initialization
            int sequenceCount = pool.Count;
            if (sequenceCount > 0)
            {
                _sequenceAlphabet = pool[0].Sequence.Alphabet;

                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
                }
            }

            // put all the initial sequences into the pool, and generate the pair scores.
            // there are no contigs in the pool yet.
            // to save an iteration, we'll also find the best global score as we go.
            ItemScore globalBest = new ItemScore(-1, -1, false, false, 0, 0);
            int globalBestLargerIndex = -1;
            int unconsumedCount = sequenceCount;

            // Compute alignment scores for all combinations between input sequences
            // Store these scores in the poolItem corresponding to each sequence
            for (int newSeq = 0; newSeq < pool.Count; ++newSeq)
            {
                PoolItem newItem = pool[newSeq];
                for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq)
                {
                    PoolItem oldItem = pool[oldSeq];
                    ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                    newItem.Scores.Add(score);
                    if (score.OverlapScore > globalBest.OverlapScore)
                    {
                        globalBest = new ItemScore(score);
                        globalBestLargerIndex = newSeq;
                    }
                }
            }

            // Merge sequence if best score is above threshold 
            // and add new contig to pool
            if (globalBest.OverlapScore >= MergeThreshold)
            {
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore);
                }

                PoolItem mergeItem1 = pool[globalBest.OtherItem];
                PoolItem mergeItem2 = pool[globalBestLargerIndex];
                Contig newContig = new Contig();
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "new pool item {0} will merge old items {1} and {2}",
                        pool.Count,
                        globalBest.OtherItem,
                        globalBestLargerIndex);
                }

                MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                MakeConsensus(newContig);

                // Set ConsumedBy value and 
                // free memory as these sequences are no longer used
                mergeItem1.ConsumedBy = pool.Count;
                mergeItem2.ConsumedBy = pool.Count;
                mergeItem1.FreeSequences();
                mergeItem2.FreeSequences();
                pool.Add(new PoolItem(newContig));
                unconsumedCount--;

                while (unconsumedCount > 1)
                {
                    // Compute scores for each unconsumed sequence with new contig
                    int newSeq = pool.Count - 1;
                    PoolItem newItem = pool[newSeq];
                    for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq)
                    {
                        PoolItem oldItem = pool[oldSeq];
                        if (oldItem.ConsumedBy >= 0)
                        {
                            // already consumed - just add dummy score to maintain correct indices
                            newItem.Scores.Add(new ItemScore());
                        }
                        else
                        {
                            ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                            newItem.Scores.Add(score);
                        }
                    }

                    // find best global score in the modified pool.
                    globalBest = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    for (int current = 0; current < pool.Count; ++current)
                    {
                        PoolItem curItem = pool[current];
                        if (curItem.ConsumedBy < 0)
                        {
                            for (int other = 0; other < current; ++other)
                            {
                                if (pool[other].ConsumedBy < 0)
                                {
                                    ItemScore itemScore = curItem.Scores[other];
                                    if (itemScore.OverlapScore > globalBest.OverlapScore)
                                    {
                                        globalBest = new ItemScore(itemScore);  // copy the winner so far
                                        globalBestLargerIndex = current;
                                    }
                                }
                            }
                        }
                    }

                    if (globalBest.OverlapScore >= MergeThreshold)
                    {
                        // Merge sequences / contigs if above threshold
                        mergeItem1 = pool[globalBest.OtherItem];
                        mergeItem2 = pool[globalBestLargerIndex];
                        newContig = new Contig();

                        if (mergeItem1.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                        }

                        if (mergeItem2.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                        }

                        MakeConsensus(newContig);
                        if (Trace.Want(Trace.AssemblyDetails))
                        {
                            Dump(newContig);
                        }

                        // Set ConsumedBy value for these poolItems and 
                        // free memory as these sequences are no longer used
                        mergeItem1.ConsumedBy = pool.Count;
                        mergeItem2.ConsumedBy = pool.Count;
                        mergeItem1.FreeSequences();
                        mergeItem2.FreeSequences();

                        pool.Add(new PoolItem(newContig));
                        unconsumedCount--;
                    }
                    else
                    {
                        // None of the alignment scores cross threshold
                        // No more merges possible. So end iteration.
                        break;
                    }
                }
            }

            // no further qualifying merges, so we're done.
            // populate contigs and unmergedSequences
            OverlapDeNovoAssembly sequenceAssembly = new OverlapDeNovoAssembly();
            foreach (PoolItem curItem in pool)
            {
                if (curItem.ConsumedBy < 0)
                {
                    if (curItem.IsContig)
                    {
                        sequenceAssembly.Contigs.Add(curItem.Contig);
                    }
                    else
                    {
                        sequenceAssembly.UnmergedSequences.Add(curItem.Sequence);
                    }
                }
            }

            return sequenceAssembly;
        }
示例#18
0
        /// <summary>
        /// Analyze the passed contig and store a consensus into its Consensus property.
        /// </summary>
        /// <param name="contig">Contig for which consensus is to be constructed</param>
        private void MakeConsensus(Contig contig)
        {
            List<byte> positionItems = new List<byte>(), consensusSequence = new List<byte>();

            // there's no simple way to pre-guess the length of the contig
            long position = 0;
            while (true)
            {
                // Initialization
                positionItems.Clear();

                // Add the sequences
                positionItems.AddRange(from aseq in contig.Sequences
                                       where position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count
                                       let seqPos = aseq.IsReversed ? (aseq.Sequence.Count() - 1) - (position - aseq.Position) : position - aseq.Position
                                       select aseq.IsComplemented ? aseq.Sequence.GetComplementedSequence()[seqPos] : aseq.Sequence[seqPos]);

                if (positionItems.Count == 0)
                {
                    // This means no sequences at this position. We're done
                    contig.Consensus = new Sequence(Alphabets.AmbiguousAlphabetMap[_sequenceAlphabet], consensusSequence.ToArray());
                    return;
                }

                consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems.ToArray()));
                position++;
            }
        }
示例#19
0
        /// <summary>
        /// Method to merge higher-indexed item with new constructed contig.
        /// Merges consumed sequence with new contig. For the consumed sequence,
        /// compute new sequence and offset to be added to new contig.
        /// </summary>
        /// <param name="newContig">New contig for merging</param>
        /// <param name="globalBest">Best Score, consensus, their offsets</param>
        /// <param name="consumedSequence">Consumed Sequence to be merged</param>
        private static void MergeHigherIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence)
        {
            Contig.AssembledSequence newASeq = new Contig.AssembledSequence();

            // as the higher-index item, this sequence is never reversed or complemented, so:
            newASeq.IsReversed = false;
            newASeq.IsComplemented = false;
            newASeq.Position = globalBest.SecondOffset;
            newASeq.Sequence = SequenceWithoutTerminalGaps(consumedSequence);

            newContig.Sequences.Add(newASeq);
            if (Trace.Want(Trace.AssemblyDetails))
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    newASeq.IsReversed,
                    newASeq.IsComplemented,
                    newASeq.Position,
                    newASeq.Sequence);
            }
        }
示例#20
0
        /// <summary>
        /// Write contig to application log
        /// </summary>
        /// <param name="contig">contig to be dumped</param>
        private static void Dump(Contig contig)
        {
            ApplicationLog.WriteLine("contig has {0} seqs, length {1}", contig.Sequences.Count, contig.Length);
            ApplicationLog.WriteLine("consensus: {0}", contig.Consensus);
            foreach (Contig.AssembledSequence aseq in contig.Sequences)
            {
                ApplicationLog.WriteLine(
                    "seq (rev = {0} comp = {1} pos = {2}) {3}",
                    aseq.IsReversed,
                    aseq.IsComplemented,
                    aseq.Position,
                    aseq.Sequence);
            }

            ApplicationLog.WriteLine(string.Empty);
        }
示例#21
0
 /// <summary>
 /// Initializes a new instance of the PoolItem class.
 /// constructor for contig.
 /// </summary>
 /// <param name="item">Pool object</param>
 internal PoolItem(Contig item)
     : this(item, true)
 {
 }
示例#22
0
 /// <summary>
 /// Initializes a new instance of the PoolItem class.
 /// constructor for contig.
 /// </summary>
 /// <param name="item">Pool object</param>
 internal PoolItem(Contig item)
     : this(item, true)
 {
 }
示例#23
0
        /// <summary>
        /// Assemble the input sequences into the largest possible contigs.
        /// </summary>
        /// <remarks>
        /// The algorithm is:
        /// 1.  initialize list of contigs to empty list. List of seqs is passed as argument.
        /// 2.  compute pairwise overlap scores for each pair of input seqs (with reversal and
        ///     complementation as appropriate).
        /// 3.  choose best overlap score. the “merge items” (can be seqs or contigs) are the
        ///     items with that score. If best score is less than threshold, assembly is finished.
        /// 4.  merge the merge items into a single contig and remove them from their list(s)
        /// 5.  compute the overlap between new item and all existing items
        /// 6.  go to step 3
        /// </remarks>
        /// <param name="inputSequences">The sequences to assemble.</param>
        /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of
        /// contigs and list of unmerged sequences which are result of this assembly.</returns>
        public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences)
        {
            if (null == inputSequences)
            {
                throw new ArgumentNullException(Properties.Resource.ParameterNameInputSequences);
            }

            // Initializations
            if (inputSequences.Count() > 0)
            {
                _sequenceAlphabet = inputSequences.First().Alphabet;

                if (ConsensusResolver == null)
                {
                    ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet);
                }
                else
                {
                    ConsensusResolver.SequenceAlphabet = _sequenceAlphabet;
                }
            }

            OverlapDeNovoAssembly sequenceAssembly = null;

            // numbering convention: every pool item (whether sequence or contig)
            // gets a fixed number.
            // sequence index = index into inputs (which we won't modify)
            // contig index = nSequences + index into contigs
            List <PoolItem> pool = new List <PoolItem>();

            foreach (ISequence seq in inputSequences)
            {
                pool.Add(new PoolItem(seq));
            }

            // put all the initial sequences into the pool, and generate the pair scores.
            // there are no contigs in the pool yet.
            // to save an iteration, we'll also find the best global score as we go.
            ItemScore globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
            int       globalBestLargerIndex = -1;
            int       unconsumedCount       = inputSequences.Count();

            // Compute alignment scores for all combinations between input sequences
            // Store these scores in the poolItem correspodning to each sequence
            for (int newSeq = 0; newSeq < pool.Count; ++newSeq)
            {
                PoolItem newItem = pool[newSeq];
                for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq)
                {
                    PoolItem  oldItem = pool[oldSeq];
                    ItemScore score   = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                    newItem.Scores.Add(score);
                    if (score.OverlapScore > globalBest.OverlapScore)
                    {
                        globalBest            = new ItemScore(score);
                        globalBestLargerIndex = newSeq;
                    }
                }
            }

            // Merge sequence if best score is above threshold
            // and add new contig to pool
            if (globalBest.OverlapScore >= MergeThreshold)
            {
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore);
                }

                PoolItem mergeItem1 = pool[globalBest.OtherItem];
                PoolItem mergeItem2 = pool[globalBestLargerIndex];
                Contig   newContig  = new Contig();
                if (Trace.Want(Trace.AssemblyDetails))
                {
                    ApplicationLog.WriteLine(
                        "new pool item {0} will merge old items {1} and {2}",
                        pool.Count,
                        globalBest.OtherItem,
                        globalBestLargerIndex);
                }

                MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);

                MakeConsensus(newContig);

                // Set ConsumedBy value and
                // free memory as these sequences are no longer used
                mergeItem1.ConsumedBy = pool.Count;
                mergeItem2.ConsumedBy = pool.Count;
                mergeItem1.FreeSequences();
                mergeItem2.FreeSequences();
                pool.Add(new PoolItem(newContig));
                unconsumedCount--;

                while (unconsumedCount > 1)
                {
                    // Compute scores for each unconsumed sequence with new contig
                    globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    int      newSeq  = pool.Count - 1;
                    PoolItem newItem = pool[newSeq];
                    for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq)
                    {
                        PoolItem oldItem = pool[oldSeq];
                        if (oldItem.ConsumedBy >= 0)
                        {
                            // already consumed - just add dummy score to maintain correct indices
                            newItem.Scores.Add(new ItemScore());
                        }
                        else
                        {
                            ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq);
                            newItem.Scores.Add(score);
                        }
                    }

                    // find best global score in the modified pool.
                    globalBest            = new ItemScore(-1, -1, false, false, 0, 0);
                    globalBestLargerIndex = -1;
                    for (int current = 0; current < pool.Count; ++current)
                    {
                        PoolItem curItem = pool[current];
                        if (curItem.ConsumedBy < 0)
                        {
                            for (int other = 0; other < current; ++other)
                            {
                                if (pool[other].ConsumedBy < 0)
                                {
                                    ItemScore itemScore = curItem.Scores[other];
                                    if (itemScore.OverlapScore > globalBest.OverlapScore)
                                    {
                                        globalBest            = new ItemScore(itemScore); // copy the winner so far
                                        globalBestLargerIndex = current;
                                    }
                                }
                            }
                        }
                    }

                    if (globalBest.OverlapScore >= MergeThreshold)
                    {
                        // Merge sequences / contigs if above threshold
                        mergeItem1 = pool[globalBest.OtherItem];
                        mergeItem2 = pool[globalBestLargerIndex];
                        newContig  = new Contig();

                        if (mergeItem1.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}",
                                    globalBest.OtherItem,
                                    globalBest.Reversed,
                                    globalBest.Complemented,
                                    globalBest.FirstOffset);
                            }

                            MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence);
                        }

                        if (mergeItem2.IsContig)
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a contig (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig);
                        }
                        else
                        {
                            if (Trace.Want(Trace.AssemblyDetails))
                            {
                                ApplicationLog.WriteLine(
                                    "item {0} is a sequence (offset = {1}",
                                    globalBestLargerIndex,
                                    globalBest.SecondOffset);
                            }

                            MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence);
                        }

                        MakeConsensus(newContig);
                        if (Trace.Want(Trace.AssemblyDetails))
                        {
                            Dump(newContig);
                        }

                        // Set ConsumedBy value for these poolItems and
                        // free memory as these sequences are no longer used
                        mergeItem1.ConsumedBy = pool.Count;
                        mergeItem2.ConsumedBy = pool.Count;
                        mergeItem1.FreeSequences();
                        mergeItem2.FreeSequences();

                        pool.Add(new PoolItem(newContig));
                        unconsumedCount--;
                    }
                    else
                    {
                        // None of the alignment scores cross threshold
                        // No more merges possible. So end iteration.
                        break;
                    }
                }
            }

            // no further qualifying merges, so we're done.
            // populate contigs and unmergedSequences
            sequenceAssembly = new OverlapDeNovoAssembly();
            foreach (PoolItem curItem in pool)
            {
                if (curItem.ConsumedBy < 0)
                {
                    if (curItem.IsContig)
                    {
                        sequenceAssembly.Contigs.Add(curItem.Contig);
                    }
                    else
                    {
                        sequenceAssembly.UnmergedSequences.Add(curItem.Sequence);
                    }
                }
            }

            return(sequenceAssembly);
        }