/// <summary> /// Method to merge higher-indexed item with new constructed contig. /// Merges consumed contig with new contig. For each sequence in consumed contig, /// compute sequence and offset to be added to new contig. /// </summary> /// <param name="newContig">New contig for merging</param> /// <param name="globalBest">Best Score, consensus, their offsets</param> /// <param name="consumedContig">Consumed Contig to be merged</param> private static void MergeHigherIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig) { foreach (Contig.AssembledSequence aseq in consumedContig.Sequences) { Contig.AssembledSequence newASeq = new Contig.AssembledSequence(); // as the higher-index item, this contig is never reversed or complemented, so: newASeq.IsReversed = aseq.IsReversed; newASeq.IsComplemented = aseq.IsComplemented; // position in the new contig adjusted by alignment of the merged items. newASeq.Position = globalBest.SecondOffset + aseq.Position; newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence); newContig.Sequences.Add(newASeq); if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "\tseq (rev = {0} comp = {1} pos = {2}) {3}", newASeq.IsReversed, newASeq.IsComplemented, newASeq.Position, newASeq.Sequence); } } }
/// <summary> /// Analyze the passed contig and store a consensus into its Consensus property. /// </summary> /// <param name="contig">Contig for which consensus is to be constructed</param> private void MakeConsensus(Contig contig) { List <byte> positionItems = new List <byte>(), consensusSequence = new List <byte>(); // there's no simple way to pre-guess the length of the contig long position = 0; while (true) { // Initialization positionItems.Clear(); // Add the sequences positionItems.AddRange(from aseq in contig.Sequences where position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count let seqPos = aseq.IsReversed ? (aseq.Sequence.Count() - 1) - (position - aseq.Position) : position - aseq.Position select aseq.IsComplemented ? aseq.Sequence.GetComplementedSequence()[seqPos] : aseq.Sequence[seqPos]); if (positionItems.Count == 0) { // This means no sequences at this position. We're done contig.Consensus = new Sequence(Alphabets.AmbiguousAlphabetMap[_sequenceAlphabet], consensusSequence.ToArray()); return; } consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems.ToArray())); position++; } }
/// <summary> /// Analyze the passed contig and store a consensus into its Consensus property. /// </summary> /// <param name="contig">Contig for which consensus is to be constructed</param> private void MakeConsensus(Contig contig) { Sequence consensusSequence = new Sequence(_sequenceAlphabet); List <ISequenceItem> positionItems = new List <ISequenceItem>(); // there's no simple way to pre-guess the length of the contig int position = 0; while (true) { // Initializations positionItems.Clear(); foreach (Contig.AssembledSequence aseq in contig.Sequences) { if (position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count) { int seqPos; if (aseq.IsReversed) { seqPos = (aseq.Sequence.Count - 1) - (position - aseq.Position); } else { seqPos = position - aseq.Position; } if (aseq.IsComplemented) { positionItems.Add(aseq.Sequence.Complement[seqPos]); } else { positionItems.Add(aseq.Sequence[seqPos]); } } } if (positionItems.Count == 0) { // This means no sequences at this position. We're done contig.Consensus = consensusSequence; return; } else { consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems)); } position++; } }
/// <summary> /// Analyze the passed contig and store a consensus into its Consensus property. /// </summary> /// <param name="contig">Contig for which consensus is to be constructed</param> private void MakeConsensus(Contig contig) { List <byte> positionItems = new List <byte>(); List <byte> consensusSequence = new List <byte>(); // there's no simple way to pre-guess the length of the contig long position = 0; while (true) { // Initializations positionItems.Clear(); foreach (Contig.AssembledSequence aseq in contig.Sequences) { if (position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count) { long seqPos; if (aseq.IsReversed) { seqPos = (aseq.Sequence.Count() - 1) - (position - aseq.Position); } else { seqPos = position - aseq.Position; } if (aseq.IsComplemented) { positionItems.Add(aseq.Sequence.GetComplementedSequence()[seqPos]); } else { positionItems.Add(aseq.Sequence[seqPos]); } } } if (positionItems.Count == 0) { // This means no sequences at this position. We're done contig.Consensus = new Sequence(Alphabets.AmbiguousAlphabetMap[_sequenceAlphabet], consensusSequence.ToArray()); return; } else { consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems.ToArray())); } position++; } }
/// <summary> /// Analyze the passed contig and store a consensus into its Consensus property. /// Public method to allow testing of consensus generation part. /// Used by test automation. /// </summary> /// <param name="alphabet">Sequence alphabet</param> /// <param name="contig">Contig for which consensus is to be constructed</param> public void MakeConsensus(IAlphabet alphabet, Contig contig) { _sequenceAlphabet = alphabet; if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet); } else { ConsensusResolver.SequenceAlphabet = _sequenceAlphabet; } MakeConsensus(contig); }
/// <summary> /// Write out a set of contigs to the given file. /// </summary> /// <param name="formatter">Formatter</param> /// <param name="contig">Contig to write</param> public static void Format(this XsvContigFormatter formatter, Contig contig) { if (formatter == null) { throw new ArgumentNullException("formatter"); } if (contig == null) { throw new ArgumentNullException("contig"); } var fs = ParserFormatterExtensions<ISequenceFormatter>.GetOpenStream(formatter, true); if (fs != null) formatter.Write(fs, contig); else throw new Exception("You must open a formatter before calling Write."); }
/// <summary> /// Write contig to application log /// </summary> /// <param name="contig">contig to be dumped</param> private static void Dump(Contig contig) { ApplicationLog.WriteLine("contig has {0} seqs, length {1}", contig.Sequences.Count, contig.Length); ApplicationLog.WriteLine("consensus: {0}", contig.Consensus); foreach (Contig.AssembledSequence aseq in contig.Sequences) { ApplicationLog.WriteLine( "seq (rev = {0} comp = {1} pos = {2}) {3}", aseq.IsReversed, aseq.IsComplemented, aseq.Position, aseq.Sequence); } ApplicationLog.WriteLine(string.Empty); }
/// <summary> /// Write out a set of contigs to the given file. /// </summary> /// <param name="formatter">Formatter</param> /// <param name="contig">Contig to write</param> /// <param name="filename">Filename</param> public static void Format(this XsvContigFormatter formatter, Contig contig, string filename) { if (formatter == null) { throw new ArgumentNullException("formatter"); } if (contig == null) { throw new ArgumentNullException("contig"); } if (string.IsNullOrWhiteSpace(filename)) { throw new ArgumentNullException("filename"); } using (var fs = File.Create(filename)) { formatter.Write(fs, contig); } }
/// <summary> /// Method to merge higher-indexed item with new constructed contig. /// Merges consumed sequence with new contig. For the consumed sequence, /// compute new sequence and offset to be added to new contig. /// </summary> /// <param name="newContig">New contig for merging</param> /// <param name="globalBest">Best Score, consensus, their offsets</param> /// <param name="consumedSequence">Consumed Sequence to be merged</param> private static void MergeHigherIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence) { Contig.AssembledSequence newASeq = new Contig.AssembledSequence(); // as the higher-index item, this sequence is never reversed or complemented, so: newASeq.IsReversed = false; newASeq.IsComplemented = false; newASeq.Position = globalBest.SecondOffset; newASeq.Sequence = SequenceWithoutTerminalGaps(consumedSequence); newContig.Sequences.Add(newASeq); if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "seq (rev = {0} comp = {1} pos = {2}) {3}", newASeq.IsReversed, newASeq.IsComplemented, newASeq.Position, newASeq.Sequence); } }
/// <summary> /// Method to merge lower-indexed item with new constructed contig /// Merges consumed sequence with new contig. For the consumed sequence, /// compute new sequence and offset to be added to new contig. /// </summary> /// <param name="newContig">New contig for merging</param> /// <param name="globalBest">Best Score, consensus, their offsets</param> /// <param name="consumedSequence">Consumed Sequence to be merged</param> private static void MergeLowerIndexedSequence(Contig newContig, ItemScore globalBest, ISequence consumedSequence) { Contig.AssembledSequence newASeq = new Contig.AssembledSequence(); // lower-indexed item might be reversed or complemented. // Retreive information from globalBest newASeq.IsReversed = globalBest.Reversed; newASeq.IsComplemented = globalBest.Complemented; newASeq.Position = globalBest.FirstOffset; newASeq.Sequence = SequenceWithoutTerminalGaps(consumedSequence); newContig.Sequences.Add(newASeq); if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "seq (rev = {0} comp = {1} pos = {2}) {3}", newASeq.IsReversed, newASeq.IsComplemented, newASeq.Position, newASeq.Sequence); } }
/// <summary> /// Method to merge lower-indexed item with new constructed contig. /// Merges consumed contig with new contig. For each sequence in consumed contig, /// compute sequence and offset to be added to new contig. /// </summary> /// <param name="newContig">New contig for merging</param> /// <param name="globalBest">Best Score along with offsets information</param> /// <param name="consumedContig">Contig to be merged</param> private static void MergeLowerIndexedContig(Contig newContig, ItemScore globalBest, Contig consumedContig) { foreach (Contig.AssembledSequence aseq in consumedContig.Sequences) { Contig.AssembledSequence newASeq = new Contig.AssembledSequence(); // lower-indexed item might be reversed or complemented. // Construct new sequence based on setting in globalBest // reverse of reverse, or comp of comp, equals no-op. So use xor newASeq.IsReversed = aseq.IsReversed ^ globalBest.Reversed; newASeq.IsComplemented = aseq.IsComplemented ^ globalBest.Complemented; // position in the new contig is adjusted by alignment of the merged items. // this depends on whether the contig is reverse-aligned. if (globalBest.Reversed) { long rightOffset = consumedContig.Length - (aseq.Sequence.Count + aseq.Position); newASeq.Position = globalBest.FirstOffset + rightOffset; } else { newASeq.Position = globalBest.FirstOffset + aseq.Position; } newASeq.Sequence = SequenceWithoutTerminalGaps(aseq.Sequence); newContig.Sequences.Add(newASeq); if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "\tseq (rev = {0} comp = {1} pos = {2}) {3}", newASeq.IsReversed, newASeq.IsComplemented, newASeq.Position, newASeq.Sequence); } } }
/// <summary> /// Formats a (sparse) contig to a character-separated value file, /// writing the consensus first, followed by the sequence separator, /// and each assembled sequences followed by the sequence separator. /// The consensus has an offset of 0, while the assembled sequences have the /// offset as present in AssembledSequence.Position. /// </summary> /// <param name="stream">Stream to write to, it is left open at the end.</param> /// <param name="contig">The contig to format as a set of sparse sequences.</param> public void Write (Stream stream, Contig contig) { if (stream == null) { throw new ArgumentNullException("stream"); } if (contig == null) { throw new ArgumentNullException("contig"); } // Write the consensus sequence out. base.Format(stream, contig.Consensus); // Write out the contigs. using (StreamWriter writer = stream.OpenWrite(leaveOpen: true)) { foreach (Contig.AssembledSequence aSeq in contig.Sequences) { this.Write(writer, aSeq.Sequence, (long)aSeq.Sequence.Metadata[XsvSparseParser.MetadataOffsetKey]); } } }
/// <summary> /// Assemble the input sequences into the largest possible contigs. /// </summary> /// <remarks> /// The algorithm is: /// 1. initialize list of contigs to empty list. List of seqs is passed as argument. /// 2. compute pairwise overlap scores for each pair of input seqs (with reversal and /// complementation as appropriate). /// 3. choose best overlap score. the “merge items” (can be seqs or contigs) are the /// items with that score. If best score is less than threshold, assembly is finished. /// 4. merge the merge items into a single contig and remove them from their list(s) /// 5. compute the overlap between new item and all existing items /// 6. go to step 3 /// </remarks> /// <param name="inputSequences">The sequences to assemble.</param> /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of /// contigs and list of unmerged sequences which are result of this assembly.</returns> public IDeNovoAssembly Assemble(IEnumerable<ISequence> inputSequences) { if (null == inputSequences) { throw new ArgumentNullException(Properties.Resource.ParameterNameInputSequences); } // numbering convention: every pool item (whether sequence or contig) // gets a fixed number. // sequence index = index into inputs (which we won't modify) // contig index = nSequences + index into contigs List<PoolItem> pool = inputSequences.Select(seq => new PoolItem(seq)).ToList(); // Initialization int sequenceCount = pool.Count; if (sequenceCount > 0) { _sequenceAlphabet = pool[0].Sequence.Alphabet; if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet); } else { ConsensusResolver.SequenceAlphabet = _sequenceAlphabet; } } // put all the initial sequences into the pool, and generate the pair scores. // there are no contigs in the pool yet. // to save an iteration, we'll also find the best global score as we go. ItemScore globalBest = new ItemScore(-1, -1, false, false, 0, 0); int globalBestLargerIndex = -1; int unconsumedCount = sequenceCount; // Compute alignment scores for all combinations between input sequences // Store these scores in the poolItem corresponding to each sequence for (int newSeq = 0; newSeq < pool.Count; ++newSeq) { PoolItem newItem = pool[newSeq]; for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq) { PoolItem oldItem = pool[oldSeq]; ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq); newItem.Scores.Add(score); if (score.OverlapScore > globalBest.OverlapScore) { globalBest = new ItemScore(score); globalBestLargerIndex = newSeq; } } } // Merge sequence if best score is above threshold // and add new contig to pool if (globalBest.OverlapScore >= MergeThreshold) { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore); } PoolItem mergeItem1 = pool[globalBest.OtherItem]; PoolItem mergeItem2 = pool[globalBestLargerIndex]; Contig newContig = new Contig(); if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "new pool item {0} will merge old items {1} and {2}", pool.Count, globalBest.OtherItem, globalBestLargerIndex); } MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence); MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence); MakeConsensus(newContig); // Set ConsumedBy value and // free memory as these sequences are no longer used mergeItem1.ConsumedBy = pool.Count; mergeItem2.ConsumedBy = pool.Count; mergeItem1.FreeSequences(); mergeItem2.FreeSequences(); pool.Add(new PoolItem(newContig)); unconsumedCount--; while (unconsumedCount > 1) { // Compute scores for each unconsumed sequence with new contig int newSeq = pool.Count - 1; PoolItem newItem = pool[newSeq]; for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq) { PoolItem oldItem = pool[oldSeq]; if (oldItem.ConsumedBy >= 0) { // already consumed - just add dummy score to maintain correct indices newItem.Scores.Add(new ItemScore()); } else { ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq); newItem.Scores.Add(score); } } // find best global score in the modified pool. globalBest = new ItemScore(-1, -1, false, false, 0, 0); globalBestLargerIndex = -1; for (int current = 0; current < pool.Count; ++current) { PoolItem curItem = pool[current]; if (curItem.ConsumedBy < 0) { for (int other = 0; other < current; ++other) { if (pool[other].ConsumedBy < 0) { ItemScore itemScore = curItem.Scores[other]; if (itemScore.OverlapScore > globalBest.OverlapScore) { globalBest = new ItemScore(itemScore); // copy the winner so far globalBestLargerIndex = current; } } } } } if (globalBest.OverlapScore >= MergeThreshold) { // Merge sequences / contigs if above threshold mergeItem1 = pool[globalBest.OtherItem]; mergeItem2 = pool[globalBestLargerIndex]; newContig = new Contig(); if (mergeItem1.IsContig) { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}", globalBest.OtherItem, globalBest.Reversed, globalBest.Complemented, globalBest.FirstOffset); } MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig); } else { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}", globalBest.OtherItem, globalBest.Reversed, globalBest.Complemented, globalBest.FirstOffset); } MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence); } if (mergeItem2.IsContig) { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a contig (offset = {1}", globalBestLargerIndex, globalBest.SecondOffset); } MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig); } else { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a sequence (offset = {1}", globalBestLargerIndex, globalBest.SecondOffset); } MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence); } MakeConsensus(newContig); if (Trace.Want(Trace.AssemblyDetails)) { Dump(newContig); } // Set ConsumedBy value for these poolItems and // free memory as these sequences are no longer used mergeItem1.ConsumedBy = pool.Count; mergeItem2.ConsumedBy = pool.Count; mergeItem1.FreeSequences(); mergeItem2.FreeSequences(); pool.Add(new PoolItem(newContig)); unconsumedCount--; } else { // None of the alignment scores cross threshold // No more merges possible. So end iteration. break; } } } // no further qualifying merges, so we're done. // populate contigs and unmergedSequences OverlapDeNovoAssembly sequenceAssembly = new OverlapDeNovoAssembly(); foreach (PoolItem curItem in pool) { if (curItem.ConsumedBy < 0) { if (curItem.IsContig) { sequenceAssembly.Contigs.Add(curItem.Contig); } else { sequenceAssembly.UnmergedSequences.Add(curItem.Sequence); } } } return sequenceAssembly; }
/// <summary> /// Analyze the passed contig and store a consensus into its Consensus property. /// </summary> /// <param name="contig">Contig for which consensus is to be constructed</param> private void MakeConsensus(Contig contig) { List<byte> positionItems = new List<byte>(), consensusSequence = new List<byte>(); // there's no simple way to pre-guess the length of the contig long position = 0; while (true) { // Initialization positionItems.Clear(); // Add the sequences positionItems.AddRange(from aseq in contig.Sequences where position >= aseq.Position && position < aseq.Position + aseq.Sequence.Count let seqPos = aseq.IsReversed ? (aseq.Sequence.Count() - 1) - (position - aseq.Position) : position - aseq.Position select aseq.IsComplemented ? aseq.Sequence.GetComplementedSequence()[seqPos] : aseq.Sequence[seqPos]); if (positionItems.Count == 0) { // This means no sequences at this position. We're done contig.Consensus = new Sequence(Alphabets.AmbiguousAlphabetMap[_sequenceAlphabet], consensusSequence.ToArray()); return; } consensusSequence.Add(ConsensusResolver.GetConsensus(positionItems.ToArray())); position++; } }
/// <summary> /// Initializes a new instance of the PoolItem class. /// constructor for contig. /// </summary> /// <param name="item">Pool object</param> internal PoolItem(Contig item) : this(item, true) { }
/// <summary> /// Assemble the input sequences into the largest possible contigs. /// </summary> /// <remarks> /// The algorithm is: /// 1. initialize list of contigs to empty list. List of seqs is passed as argument. /// 2. compute pairwise overlap scores for each pair of input seqs (with reversal and /// complementation as appropriate). /// 3. choose best overlap score. the “merge items” (can be seqs or contigs) are the /// items with that score. If best score is less than threshold, assembly is finished. /// 4. merge the merge items into a single contig and remove them from their list(s) /// 5. compute the overlap between new item and all existing items /// 6. go to step 3 /// </remarks> /// <param name="inputSequences">The sequences to assemble.</param> /// <returns>Returns the OverlapDeNovoAssembly instance which contains list of /// contigs and list of unmerged sequences which are result of this assembly.</returns> public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences) { if (null == inputSequences) { throw new ArgumentNullException(Properties.Resource.ParameterNameInputSequences); } // Initializations if (inputSequences.Count() > 0) { _sequenceAlphabet = inputSequences.First().Alphabet; if (ConsensusResolver == null) { ConsensusResolver = new SimpleConsensusResolver(_sequenceAlphabet); } else { ConsensusResolver.SequenceAlphabet = _sequenceAlphabet; } } OverlapDeNovoAssembly sequenceAssembly = null; // numbering convention: every pool item (whether sequence or contig) // gets a fixed number. // sequence index = index into inputs (which we won't modify) // contig index = nSequences + index into contigs List <PoolItem> pool = new List <PoolItem>(); foreach (ISequence seq in inputSequences) { pool.Add(new PoolItem(seq)); } // put all the initial sequences into the pool, and generate the pair scores. // there are no contigs in the pool yet. // to save an iteration, we'll also find the best global score as we go. ItemScore globalBest = new ItemScore(-1, -1, false, false, 0, 0); int globalBestLargerIndex = -1; int unconsumedCount = inputSequences.Count(); // Compute alignment scores for all combinations between input sequences // Store these scores in the poolItem correspodning to each sequence for (int newSeq = 0; newSeq < pool.Count; ++newSeq) { PoolItem newItem = pool[newSeq]; for (int oldSeq = 0; oldSeq < newSeq; ++oldSeq) { PoolItem oldItem = pool[oldSeq]; ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq); newItem.Scores.Add(score); if (score.OverlapScore > globalBest.OverlapScore) { globalBest = new ItemScore(score); globalBestLargerIndex = newSeq; } } } // Merge sequence if best score is above threshold // and add new contig to pool if (globalBest.OverlapScore >= MergeThreshold) { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine("Merging (overlap score {0}):", globalBest.OverlapScore); } PoolItem mergeItem1 = pool[globalBest.OtherItem]; PoolItem mergeItem2 = pool[globalBestLargerIndex]; Contig newContig = new Contig(); if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "new pool item {0} will merge old items {1} and {2}", pool.Count, globalBest.OtherItem, globalBestLargerIndex); } MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence); MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence); MakeConsensus(newContig); // Set ConsumedBy value and // free memory as these sequences are no longer used mergeItem1.ConsumedBy = pool.Count; mergeItem2.ConsumedBy = pool.Count; mergeItem1.FreeSequences(); mergeItem2.FreeSequences(); pool.Add(new PoolItem(newContig)); unconsumedCount--; while (unconsumedCount > 1) { // Compute scores for each unconsumed sequence with new contig globalBest = new ItemScore(-1, -1, false, false, 0, 0); globalBestLargerIndex = -1; int newSeq = pool.Count - 1; PoolItem newItem = pool[newSeq]; for (int oldSeq = 0; oldSeq < pool.Count - 1; ++oldSeq) { PoolItem oldItem = pool[oldSeq]; if (oldItem.ConsumedBy >= 0) { // already consumed - just add dummy score to maintain correct indices newItem.Scores.Add(new ItemScore()); } else { ItemScore score = AlignSequence(oldItem.SequenceOrConsensus, newItem.SequenceOrConsensus, oldSeq, newSeq); newItem.Scores.Add(score); } } // find best global score in the modified pool. globalBest = new ItemScore(-1, -1, false, false, 0, 0); globalBestLargerIndex = -1; for (int current = 0; current < pool.Count; ++current) { PoolItem curItem = pool[current]; if (curItem.ConsumedBy < 0) { for (int other = 0; other < current; ++other) { if (pool[other].ConsumedBy < 0) { ItemScore itemScore = curItem.Scores[other]; if (itemScore.OverlapScore > globalBest.OverlapScore) { globalBest = new ItemScore(itemScore); // copy the winner so far globalBestLargerIndex = current; } } } } } if (globalBest.OverlapScore >= MergeThreshold) { // Merge sequences / contigs if above threshold mergeItem1 = pool[globalBest.OtherItem]; mergeItem2 = pool[globalBestLargerIndex]; newContig = new Contig(); if (mergeItem1.IsContig) { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a contig (reversed = {1}, complemented = {2}, offset = {3}", globalBest.OtherItem, globalBest.Reversed, globalBest.Complemented, globalBest.FirstOffset); } MergeLowerIndexedContig(newContig, globalBest, mergeItem1.Contig); } else { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a sequence (reversed = {1}, complemented = {2}, offset = {3}", globalBest.OtherItem, globalBest.Reversed, globalBest.Complemented, globalBest.FirstOffset); } MergeLowerIndexedSequence(newContig, globalBest, mergeItem1.Sequence); } if (mergeItem2.IsContig) { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a contig (offset = {1}", globalBestLargerIndex, globalBest.SecondOffset); } MergeHigherIndexedContig(newContig, globalBest, mergeItem2.Contig); } else { if (Trace.Want(Trace.AssemblyDetails)) { ApplicationLog.WriteLine( "item {0} is a sequence (offset = {1}", globalBestLargerIndex, globalBest.SecondOffset); } MergeHigherIndexedSequence(newContig, globalBest, mergeItem2.Sequence); } MakeConsensus(newContig); if (Trace.Want(Trace.AssemblyDetails)) { Dump(newContig); } // Set ConsumedBy value for these poolItems and // free memory as these sequences are no longer used mergeItem1.ConsumedBy = pool.Count; mergeItem2.ConsumedBy = pool.Count; mergeItem1.FreeSequences(); mergeItem2.FreeSequences(); pool.Add(new PoolItem(newContig)); unconsumedCount--; } else { // None of the alignment scores cross threshold // No more merges possible. So end iteration. break; } } } // no further qualifying merges, so we're done. // populate contigs and unmergedSequences sequenceAssembly = new OverlapDeNovoAssembly(); foreach (PoolItem curItem in pool) { if (curItem.ConsumedBy < 0) { if (curItem.IsContig) { sequenceAssembly.Contigs.Add(curItem.Contig); } else { sequenceAssembly.UnmergedSequences.Add(curItem.Sequence); } } } return(sequenceAssembly); }