/// <summary> /// Refine layout in the delta alignments. /// </summary> public void RefineLayout() { TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); runAlgorithm.Restart(); FileInfo inputFileinfo = new FileInfo(this.FilePath[1]); long inputFileLength = inputFileinfo.Length; FastASequencePositionParser queryParser; using (var input = File.OpenRead(FilePath[1])) { queryParser = new FastASequencePositionParser(input, true); queryParser.CacheSequencesForRandomAccess(); } runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[0]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); using (var input = File.OpenRead(FilePath[0])) using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(input, queryParser)) { runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } runAlgorithm.Restart(); IEnumerable <DeltaAlignment> result = LayoutRefiner.RefineLayout(deltaCollection); DeltaAlignmentSorter sorter = new DeltaAlignmentSorter(); WriteDelta(result, sorter, UnsortedLayoutRefinmentOutputFilename); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); WriteSortedDelta(sorter, UnsortedLayoutRefinmentOutputFilename, queryParser, this.OutputFile); runAlgorithm.Stop(); } if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", timeSpan); Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// Initializes a new instance of the VirtualDeltaAlignmentCollection class. /// </summary> /// <param name="orderedDeltas"></param> /// <param name="windowSize"></param> public VirtualDeltaAlignmentCollection(DeltaAlignmentCollection orderedDeltas, int windowSize) { this.windowSize = windowSize; this.Count = orderedDeltas.Count; this.catchedDeltas = new List <DeltaAlignment>(); this.catchSize = windowSize * 1000; this.sourceDeltas = orderedDeltas.DeltaAlignmentParser.Parse().GetEnumerator(); }
/// <summary> /// Resolve ambiguity in the delta alignments. /// </summary> public void ResolveAmbiguity() { TimeSpan repeatResolutionSpan = new TimeSpan(); Stopwatch runRepeatResolution = new Stopwatch(); runRepeatResolution.Restart(); FileInfo inputFileinfo = new FileInfo(this.FilePath[1]); long inputFileLength = inputFileinfo.Length; runRepeatResolution.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1])); Console.Error.WriteLine(" Read/Processing time: {0}", runRepeatResolution.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[0]); inputFileLength = inputFileinfo.Length; runRepeatResolution.Restart(); DeltaAlignmentSorter sorter = null; using (var alignmentStream = File.OpenRead(this.FilePath[0])) using (var readStream = File.OpenRead(this.FilePath[1])) using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(alignmentStream, readStream)) { runRepeatResolution.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0])); Console.Error.WriteLine(" Read/Processing time: {0}", runRepeatResolution.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } runRepeatResolution.Restart(); IEnumerable <DeltaAlignment> outputDeltas = RepeatResolver.ResolveAmbiguity(deltaCollection); sorter = new DeltaAlignmentSorter(); WriteUnsortedDelta(outputDeltas, sorter); } runRepeatResolution.Stop(); repeatResolutionSpan = repeatResolutionSpan.Add(runRepeatResolution.Elapsed); runRepeatResolution.Restart(); this.WriteDelta(sorter); runRepeatResolution.Stop(); if (this.Verbose) { Console.Error.WriteLine(" Compute time: {0}", repeatResolutionSpan); Console.Error.WriteLine(" Write() time: {0}", runRepeatResolution.Elapsed); } }
/// <summary> /// Refine layout in the delta alignments. /// </summary> public void GenerateConsensus() { TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); runAlgorithm.Restart(); FileInfo inputFileinfo = new FileInfo(this.FilePath[1]); long inputFileLength = inputFileinfo.Length; inputFileinfo = null; runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[0]); inputFileLength = inputFileinfo.Length; inputFileinfo = null; runAlgorithm.Restart(); using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(this.FilePath[0], this.FilePath[1])) { runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } runAlgorithm.Restart(); IEnumerable <ISequence> consensus = ConsensusGeneration.GenerateConsensus(deltaCollection); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); this.WriteSequences(consensus); runAlgorithm.Stop(); } if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", timeSpan); Output.WriteLine(OutputLevel.Verbose, "Write() time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// Generates consensus sequences from alignment layout. /// </summary> /// <param name="alignmentBetweenReferenceAndReads">Input list of reads.</param> /// <returns>List of contigs.</returns> public static IEnumerable <ISequence> GenerateConsensus(DeltaAlignmentCollection alignmentBetweenReferenceAndReads) { if (alignmentBetweenReferenceAndReads == null) { throw new ArgumentNullException("alignmentBetweenReferenceAndReads"); } SimpleConsensusResolver resolver = new SimpleConsensusResolver(AmbiguousDnaAlphabet.Instance, 49); // this dictionary will not grow more than a few hundread in worst scenario, // as this stores delta and its corresponding sequences Dictionary <DeltaAlignment, ISequence> deltasInCurrentContig = new Dictionary <DeltaAlignment, ISequence>(); long currentAlignmentStartOffset = 0; long currentIndex = 0; long inDeltaIndex = 0; DeltaAlignment lastDelta; List <byte> currentContig = new List <byte>(); List <DeltaAlignment> deltasToRemove = new List <DeltaAlignment>(); // no deltas if (alignmentBetweenReferenceAndReads.Count == 0) { yield break; } long index = 0; lastDelta = alignmentBetweenReferenceAndReads[index]; do { // Starting a new contig if (deltasInCurrentContig.Count == 0) { currentAlignmentStartOffset = lastDelta.FirstSequenceStart; currentIndex = 0; currentContig.Clear(); } // loop through all deltas at current index and find consensus do { // Proceed creating consensus till we find another delta stats aligning while (lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // Get next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // see if new delta starts from the same offset } else { lastDelta = null; } } byte[] symbolsAtCurrentIndex = new byte[deltasInCurrentContig.Count]; int symbolCounter = 0; foreach (var delta in deltasInCurrentContig) { inDeltaIndex = currentIndex - (delta.Key.FirstSequenceStart - currentAlignmentStartOffset); symbolsAtCurrentIndex[symbolCounter++] = delta.Value[inDeltaIndex]; if (inDeltaIndex == delta.Value.Count - 1) { deltasToRemove.Add(delta.Key); } } if (deltasToRemove.Count > 0) { for (int i = 0; i < deltasToRemove.Count; i++) { deltasInCurrentContig.Remove(deltasToRemove[i]); } deltasToRemove.Clear(); } byte consensusSymbol = resolver.GetConsensus(symbolsAtCurrentIndex); currentContig.Add(consensusSymbol); currentIndex++; // See if another delta is adjacent if (deltasInCurrentContig.Count == 0 && lastDelta != null && lastDelta.FirstSequenceStart == currentAlignmentStartOffset + currentIndex) { deltasInCurrentContig.Add(lastDelta, GetSequenceFromDelta(lastDelta)); // check next delta index++; if (alignmentBetweenReferenceAndReads.Count > index) { lastDelta = alignmentBetweenReferenceAndReads[index]; continue; // read next delta to see if it starts from current reference sequence offset } else { lastDelta = null; } } }while (deltasInCurrentContig.Count > 0); yield return(new Sequence(AmbiguousDnaAlphabet.Instance, currentContig.ToArray(), false)); }while (lastDelta != null); }
/// <summary> /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes. /// Requires mate-pair information to resolve ambiguity. /// </summary> /// <param name="orderedDeltas">Order deltas.</param> public static IEnumerable <DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas) { if (orderedDeltas == null) { throw new ArgumentNullException("orderedDeltas"); } if (orderedDeltas.Count == 0) { yield break; } // As we dont know what is the maximum posible insert and deltes, // assuming 1,000,000 deltas are sufficient for operation. int windowSize = 1000; VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize); List <DeltaAlignment> deltasOverlappingAtCurrentIndex = null; List <DeltaAlignment> leftSideDeltas = null; List <DeltaAlignment> rightSideDeltas = null; List <DeltaAlignment> unloadedDeltas = null; try { deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>(); leftSideDeltas = new List <DeltaAlignment>(); rightSideDeltas = new List <DeltaAlignment>(); long currentProcessedOffset = 0; DeltaAlignment alignment = deltaCatche[0]; deltasOverlappingAtCurrentIndex.Add(alignment); DeltaAlignment deltaWithLargestEndIndex = alignment; for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++) { DeltaAlignment nextDelta = deltaCatche[currentIndex + 1]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (currentProcessedOffset != 0) { nextDelta.FirstSequenceStart += currentProcessedOffset; nextDelta.FirstSequenceEnd += currentProcessedOffset; } // Check if next delta is just adjacent if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd) { // If next delta is adjacent there is a possible insertion in target (deletion in reference) // Try to extend the deltas from both sides and make them meet leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the adjacent right side rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas); if (offset != 0) { nextDelta.FirstSequenceStart += offset; nextDelta.FirstSequenceEnd += offset; } currentProcessedOffset += offset; } else if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd) { // Check if next delta overlaps with current overlap group deltasOverlappingAtCurrentIndex.Add(nextDelta); // Check if nextDelta is reaching farther than the current farthest delta if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd) { deltaWithLargestEndIndex = nextDelta; } if (deltasOverlappingAtCurrentIndex.Count > windowSize) { for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--) { if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd) { deltasOverlappingAtCurrentIndex.RemoveAt(i); } } } } else { // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the right end of the gap rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } int score = 0; for (int i = 0; i < leftSideDeltas.Count; i++) { var l = leftSideDeltas[i]; int j = 0; for (; j < rightSideDeltas.Count; j++) { var r = rightSideDeltas[j]; // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence)) // As reference check is not posible, verifying ids here. as id are unique for a given read. if (l.QuerySequence.ID == r.QuerySequence.ID) { score++; break; } } if (j == rightSideDeltas.Count) { score--; } } // Score > 0 means most deltas share same query sequence at both ends, so close this gap if (score > 0) { long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1; currentProcessedOffset -= gaplength; // Pull deltas on right side to close the gap for (int i = 0; i < rightSideDeltas.Count; i++) { DeltaAlignment delta = rightSideDeltas[i]; delta.FirstSequenceStart -= gaplength; delta.FirstSequenceEnd -= gaplength; // deltaCatche.Update(delta.Id); } } // Start a new group from the right side of the gap deltaWithLargestEndIndex = nextDelta; deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex.Add(nextDelta); } } unloadedDeltas = deltaCatche.GetCachedDeltas(); for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } finally { if (deltasOverlappingAtCurrentIndex != null) { deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex = null; } if (leftSideDeltas != null) { leftSideDeltas.Clear(); leftSideDeltas = null; } if (rightSideDeltas != null) { rightSideDeltas.Clear(); rightSideDeltas = null; } if (deltaCatche != null) { deltaCatche = null; } } }
/// <summary> /// Reads ambiguously placed due to genomic reads. /// This step requires mate pair information to resolve the ambiguity about placements of repeated sequences. /// </summary> /// <param name="alignmentBetweenReferenceAndReads">Alignment between reference genome and reads.</param> /// <returns>List of DeltaAlignments after resolving repeating reads.</returns> public static IEnumerable <DeltaAlignment> ResolveAmbiguity(DeltaAlignmentCollection alignmentBetweenReferenceAndReads) { if (alignmentBetweenReferenceAndReads == null) { throw new ArgumentNullException("alignmentBetweenReferenceAndReads"); } // Process reads and add to result list. // Loop till all reads are processed foreach (var curReadDeltas in alignmentBetweenReferenceAndReads.GetDeltaAlignmentsByReads()) { if (curReadDeltas == null) { continue; } int deltasInCurrentRead = curReadDeltas.Count; // If curReadDeltas has only one delta, then there are no repeats so add it to result // Or if any delta is a partial alignment, dont try to resolve, add all deltas to result if (deltasInCurrentRead == 1 || curReadDeltas.Any(a => { return(a.SecondSequenceEnd != a.QuerySequence.Count - 1); })) { //result.AddRange(curReadDeltas); foreach (var delta in curReadDeltas) { yield return(delta); } } else if (deltasInCurrentRead == 0) { continue; } else { // Resolve repeats string sequenceId = curReadDeltas[0].QuerySequence.ID; string originalSequenceId; bool forwardRead; string pairedReadType; string libraryName; bool pairedRead = Helper.ValidatePairedSequenceId(sequenceId, out originalSequenceId, out forwardRead, out pairedReadType, out libraryName); // If read is not having proper ID, ignore the read if (!pairedRead) { //result.AddRange(curReadDeltas); foreach (var delta in curReadDeltas) { yield return(delta); } continue; } string pairedReadId = Helper.GetPairedReadId(originalSequenceId, Helper.GetMatePairedReadType(pairedReadType), libraryName); // Find mate pair List <DeltaAlignment> mateDeltas = alignmentBetweenReferenceAndReads.GetDeltaAlignmentFor(pairedReadId); // If mate pair not found, ignore current read if (mateDeltas.Count == 0) { //result.AddRange(curReadDeltas); foreach (var delta in curReadDeltas) { yield return(delta); } continue; } // Resolve using distance method List <DeltaAlignment> resolvedDeltas = ResolveRepeatUsingMatePair(curReadDeltas, mateDeltas, libraryName); if (resolvedDeltas != null) { //result.AddRange(resolvedDeltas); foreach (var delta in resolvedDeltas) { yield return(delta); } } } } }
/// <summary> /// Generates a consensus sequence for the genomic region covered by reads. /// </summary> /// <param name="deltaAlignmentCollection">Alignment between reference genome and reads.</param> /// <returns>List of contigs.</returns> private IEnumerable <ISequence> ConsensusGenerator(DeltaAlignmentCollection deltaAlignmentCollection) { return(ConsensusGeneration.GenerateConsensus(deltaAlignmentCollection)); }
/// <summary> /// Reads ambiguously placed due to genomic reads. /// This step requires mate pair information to resolve the ambiguity about placements of repeated sequences. /// </summary> /// <param name="deltaAlignmentCollection">Alignment between reference genome and reads.</param> /// <returns>List of DeltaAlignments after resolving repeating reads.</returns> private static IEnumerable <DeltaAlignment> RepeatResolution(DeltaAlignmentCollection deltaAlignmentCollection) { return(RepeatResolver.ResolveAmbiguity(deltaAlignmentCollection)); }
/// <summary> /// Refines layout of alignment between reads and reference genome by taking care of indels and rearrangements. /// </summary> /// <param name="deltaAlignmentCollection">Ordered Repeat Resolved Deltas.</param> private static IEnumerable <DeltaAlignment> LayoutRefinment(DeltaAlignmentCollection deltaAlignmentCollection) { return(LayoutRefiner.RefineLayout(deltaAlignmentCollection)); }
/// <summary> /// Assemble the input sequences into the largest possible contigs. /// </summary> /// <param name="referenceSequence">The sequence used as backbone for assembly.</param> /// <param name="queryParser">The parser to load the sequences to assemble.</param> /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns> public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser) { this._progressTimer = new Timer(ProgressTimerInterval); this._progressTimer.Elapsed += this.ProgressTimerElapsed; if (queryParser == null) { throw new ArgumentNullException("queryParser"); } string readAlignmentOutputFilename = null; string unsortedRepeatResolutionOutputFilename = null; string repeateResolutionOutputFilename = null; string unsortedLayoutRefinmentOutputFilename = null; string layoutRefinmentOutputFileName = null; try { // Converting to list to avoid multiple parse of the reference file if its a yield return var refSequences = referenceSequence.ToList(); // CacheSequencesForRandomAccess will ignore the call if called more than once. queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); // Comparative Assembly Steps // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence) this.StatusEventStart(Properties.Resources.ReadAlignmentStarted); IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences, reads.Where(a => a.Count >= this.LengthOfMum)); readAlignmentOutputFilename = Path.GetTempFileName(); WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputFilename); this.StatusEventEnd(Properties.Resources.ReadAlignmentEnded); // 2) Repeat Resolution this.StatusEventStart(Properties.Resources.RepeatResolutionStarted); DeltaAlignmentSorter sorter; unsortedRepeatResolutionOutputFilename = Path.GetTempFileName(); using (DeltaAlignmentCollection deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputFilename, queryParser)) { IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputFilename); } this.StatusEventEnd(Properties.Resources.RepeatResolutionEnded); this.StatusEventStart(Properties.Resources.SortingResolvedDeltasStarted); repeateResolutionOutputFilename = Path.GetTempFileName(); WriteSortedDelta(sorter, unsortedRepeatResolutionOutputFilename, queryParser, repeateResolutionOutputFilename); this.StatusEventEnd(Properties.Resources.SortingResolvedDeltasEnded); // 3) Layout Refinement this.StatusEventStart(Properties.Resources.LayoutRefinementStarted); layoutRefinmentOutputFileName = Path.GetTempFileName(); using (DeltaAlignmentCollection unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeateResolutionOutputFilename, queryParser)) { unsortedLayoutRefinmentOutputFilename = Path.GetTempFileName(); IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputFilename); WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputFilename, queryParser, layoutRefinmentOutputFileName); } this.StatusEventEnd(Properties.Resources.LayoutRefinementEnded); // 4) Consensus Generation this.StatusEventStart(Properties.Resources.ConsensusGenerationStarted); IList <ISequence> contigs; using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputFileName, queryParser)) { contigs = this.ConsensusGenerator(delta).ToList(); } this.StatusEventEnd(Properties.Resources.ConsensusGenerationEnded); if (this.ScaffoldingEnabled) { // 5) Scaffold Generation this.StatusEventStart(Properties.Resources.ScaffoldGenerationStarted); IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads); this.StatusEventEnd(Properties.Resources.ScaffoldGenerationEnded); return(scaffolds); } else { return(contigs); } } finally { this._progressTimer.Stop(); // Cleanup temp files. if (!string.IsNullOrEmpty(readAlignmentOutputFilename)) { File.Delete(readAlignmentOutputFilename); } if (!string.IsNullOrEmpty(unsortedRepeatResolutionOutputFilename)) { File.Delete(unsortedRepeatResolutionOutputFilename); } if (!string.IsNullOrEmpty(repeateResolutionOutputFilename)) { File.Delete(repeateResolutionOutputFilename); } if (!string.IsNullOrEmpty(unsortedLayoutRefinmentOutputFilename)) { File.Delete(unsortedLayoutRefinmentOutputFilename); } if (!string.IsNullOrEmpty(layoutRefinmentOutputFileName)) { File.Delete(layoutRefinmentOutputFileName); } } }
/// <summary> /// Assemble the input sequences into the largest possible contigs. /// </summary> /// <param name="referenceSequence">The sequence used as backbone for assembly.</param> /// <param name="queryParser">The parser to load the sequences to assemble.</param> /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns> public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser) { if (referenceSequence == null) { throw new ArgumentNullException("referenceSequence"); } if (queryParser == null) { throw new ArgumentNullException("queryParser"); } Stream readAlignmentOutputStream = null; Stream unsortedRepeatResolutionOutputStream = null; Stream repeatResolutionOutputStream = null; Stream unsortedLayoutRefinmentOutputStream = null; Stream layoutRefinmentOutputStream = null; try { // Converting to list to avoid multiple parse of the reference file if its a yield return var refSequences = referenceSequence.ToList(); // CacheSequencesForRandomAccess will ignore the call if called more than once. queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); // Comparative Assembly Steps // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence) this.StatusEventStart(Properties.Resource.ReadAlignmentStarted); IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences, reads.Where(a => a.Count >= this.LengthOfMum)); readAlignmentOutputStream = PlatformManager.Services.CreateTempStream(); WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputStream); this.StatusEventEnd(Properties.Resource.ReadAlignmentEnded); // 2) Repeat Resolution this.StatusEventStart(Properties.Resource.RepeatResolutionStarted); DeltaAlignmentSorter sorter; unsortedRepeatResolutionOutputStream = PlatformManager.Services.CreateTempStream(); using (var deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputStream, queryParser)) { IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputStream); } this.StatusEventEnd(Properties.Resource.RepeatResolutionEnded); this.StatusEventStart(Properties.Resource.SortingResolvedDeltasStarted); repeatResolutionOutputStream = PlatformManager.Services.CreateTempStream(); WriteSortedDelta(sorter, unsortedRepeatResolutionOutputStream, queryParser, repeatResolutionOutputStream); this.StatusEventEnd(Properties.Resource.SortingResolvedDeltasEnded); // 3) Layout Refinement this.StatusEventStart(Properties.Resource.LayoutRefinementStarted); layoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream(); using (var unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeatResolutionOutputStream, queryParser)) { unsortedLayoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream(); IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputStream); WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputStream, queryParser, layoutRefinmentOutputStream); } this.StatusEventEnd(Properties.Resource.LayoutRefinementEnded); // 4) Consensus Generation this.StatusEventStart(Properties.Resource.ConsensusGenerationStarted); IList <ISequence> contigs; using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputStream, queryParser)) { contigs = this.ConsensusGenerator(delta).ToList(); } this.StatusEventEnd(Properties.Resource.ConsensusGenerationEnded); if (this.ScaffoldingEnabled) { // 5) Scaffold Generation this.StatusEventStart(Properties.Resource.ScaffoldGenerationStarted); IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads); this.StatusEventEnd(Properties.Resource.ScaffoldGenerationEnded); return(scaffolds); } else { return(contigs); } } finally { // Cleanup temp files. if (readAlignmentOutputStream != null) { readAlignmentOutputStream.Dispose(); } if (unsortedRepeatResolutionOutputStream != null) { unsortedRepeatResolutionOutputStream.Dispose(); } if (repeatResolutionOutputStream != null) { repeatResolutionOutputStream.Dispose(); } if (unsortedLayoutRefinmentOutputStream != null) { unsortedLayoutRefinmentOutputStream.Dispose(); } if (layoutRefinmentOutputStream != null) { layoutRefinmentOutputStream.Dispose(); } } }