/// <summary> /// Refine layout in the delta alignments. /// </summary> public void RefineLayout() { TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); runAlgorithm.Restart(); FileInfo inputFileinfo = new FileInfo(this.FilePath[1]); long inputFileLength = inputFileinfo.Length; FastASequencePositionParser queryParser; using (var input = File.OpenRead(FilePath[1])) { queryParser = new FastASequencePositionParser(input, true); queryParser.CacheSequencesForRandomAccess(); } runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed Query FastA file: {0}", Path.GetFullPath(this.FilePath[1])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[0]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); using (var input = File.OpenRead(FilePath[0])) using (DeltaAlignmentCollection deltaCollection = new DeltaAlignmentCollection(input, queryParser)) { runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed DeltaAlignment file: {0}", Path.GetFullPath(this.FilePath[0])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } runAlgorithm.Restart(); IEnumerable <DeltaAlignment> result = LayoutRefiner.RefineLayout(deltaCollection); DeltaAlignmentSorter sorter = new DeltaAlignmentSorter(); WriteDelta(result, sorter, UnsortedLayoutRefinmentOutputFilename); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); WriteSortedDelta(sorter, UnsortedLayoutRefinmentOutputFilename, queryParser, this.OutputFile); runAlgorithm.Stop(); } if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", timeSpan); Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { if (this.FilePath.Length != 2) { Console.Error.WriteLine("\nError: A reference file and 1 query file are required."); Environment.Exit(-1); } TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo inputFileinfo = new FileInfo(this.FilePath[0]); long inputFileLength = inputFileinfo.Length; inputFileinfo = null; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); // Parse input files IEnumerable <ISequence> referenceSequences = new FastAParser(this.FilePath[0]).Parse(); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed reference file: {0}", Path.GetFullPath(this.FilePath[0])); Console.Error.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[1]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true); queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed reads file: {0}", Path.GetFullPath(this.FilePath[1])); Console.Error.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } runAlgorithm.Restart(); ValidateAmbiguousReads(reads); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Time taken for Validating reads: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler(); assembler.StatusChanged += new EventHandler <StatusChangedEventArgs>(this.AssemblerStatusChanged); assembler.ScaffoldingEnabled = this.Scaffold; assembler.KmerLength = this.KmerLength; assembler.LengthOfMum = this.MumLength; IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); if (this.OutputFile == null) { // Write output to console. this.WriteContigs(assemblerResult, Console.Out); } else { // Write output to the specified file. this.WriteContigs(assemblerResult, null); Console.WriteLine(Resources.OutPutWrittenToFileSpecified); } runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(" Assemble time: {0}", timeSpan); Console.Error.WriteLine(" Write() time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { if (this.FilePath.Length != 2) { Output.WriteLine(OutputLevel.Error, "Error: A reference file and 1 query file are required."); return; } TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo inputFileinfo = new FileInfo(this.FilePath[0]); long inputFileLength = inputFileinfo.Length; inputFileinfo = null; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); // Parse input files IEnumerable <ISequence> referenceSequences = ParseFile(this.FilePath[0]); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed reference file: {0}", Path.GetFullPath(this.FilePath[0])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time : {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[1]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); FastASequencePositionParser queryParser = new FastASequencePositionParser(this.FilePath[1], true); queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed reads file : {0}", Path.GetFullPath(this.FilePath[1])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", inputFileLength); } runAlgorithm.Restart(); ValidateAmbiguousReads(reads); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose); } runAlgorithm.Restart(); ComparativeGenomeAssembler assembler = new ComparativeGenomeAssembler(); assembler.StatusChanged += this.AssemblerStatusChanged; assembler.ScaffoldingEnabled = this.Scaffold; assembler.KmerLength = this.KmerLength; assembler.LengthOfMum = this.MumLength; IEnumerable <ISequence> assemblerResult = assembler.Assemble(referenceSequences, queryParser); runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); this.WriteContigs(assemblerResult); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose, "Assemble time: {0}", timeSpan); Output.WriteLine(OutputLevel.Verbose, "Write time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// Assemble the input sequences into the largest possible contigs. /// </summary> /// <param name="referenceSequence">The sequence used as backbone for assembly.</param> /// <param name="queryParser">The parser to load the sequences to assemble.</param> /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns> public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser) { this._progressTimer = new Timer(ProgressTimerInterval); this._progressTimer.Elapsed += this.ProgressTimerElapsed; if (queryParser == null) { throw new ArgumentNullException("queryParser"); } string readAlignmentOutputFilename = null; string unsortedRepeatResolutionOutputFilename = null; string repeateResolutionOutputFilename = null; string unsortedLayoutRefinmentOutputFilename = null; string layoutRefinmentOutputFileName = null; try { // Converting to list to avoid multiple parse of the reference file if its a yield return var refSequences = referenceSequence.ToList(); // CacheSequencesForRandomAccess will ignore the call if called more than once. queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); // Comparative Assembly Steps // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence) this.StatusEventStart(Properties.Resources.ReadAlignmentStarted); IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences, reads.Where(a => a.Count >= this.LengthOfMum)); readAlignmentOutputFilename = Path.GetTempFileName(); WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputFilename); this.StatusEventEnd(Properties.Resources.ReadAlignmentEnded); // 2) Repeat Resolution this.StatusEventStart(Properties.Resources.RepeatResolutionStarted); DeltaAlignmentSorter sorter; unsortedRepeatResolutionOutputFilename = Path.GetTempFileName(); using (DeltaAlignmentCollection deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputFilename, queryParser)) { IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputFilename); } this.StatusEventEnd(Properties.Resources.RepeatResolutionEnded); this.StatusEventStart(Properties.Resources.SortingResolvedDeltasStarted); repeateResolutionOutputFilename = Path.GetTempFileName(); WriteSortedDelta(sorter, unsortedRepeatResolutionOutputFilename, queryParser, repeateResolutionOutputFilename); this.StatusEventEnd(Properties.Resources.SortingResolvedDeltasEnded); // 3) Layout Refinement this.StatusEventStart(Properties.Resources.LayoutRefinementStarted); layoutRefinmentOutputFileName = Path.GetTempFileName(); using (DeltaAlignmentCollection unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeateResolutionOutputFilename, queryParser)) { unsortedLayoutRefinmentOutputFilename = Path.GetTempFileName(); IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputFilename); WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputFilename, queryParser, layoutRefinmentOutputFileName); } this.StatusEventEnd(Properties.Resources.LayoutRefinementEnded); // 4) Consensus Generation this.StatusEventStart(Properties.Resources.ConsensusGenerationStarted); IList <ISequence> contigs; using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputFileName, queryParser)) { contigs = this.ConsensusGenerator(delta).ToList(); } this.StatusEventEnd(Properties.Resources.ConsensusGenerationEnded); if (this.ScaffoldingEnabled) { // 5) Scaffold Generation this.StatusEventStart(Properties.Resources.ScaffoldGenerationStarted); IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads); this.StatusEventEnd(Properties.Resources.ScaffoldGenerationEnded); return(scaffolds); } else { return(contigs); } } finally { this._progressTimer.Stop(); // Cleanup temp files. if (!string.IsNullOrEmpty(readAlignmentOutputFilename)) { File.Delete(readAlignmentOutputFilename); } if (!string.IsNullOrEmpty(unsortedRepeatResolutionOutputFilename)) { File.Delete(unsortedRepeatResolutionOutputFilename); } if (!string.IsNullOrEmpty(repeateResolutionOutputFilename)) { File.Delete(repeateResolutionOutputFilename); } if (!string.IsNullOrEmpty(unsortedLayoutRefinmentOutputFilename)) { File.Delete(unsortedLayoutRefinmentOutputFilename); } if (!string.IsNullOrEmpty(layoutRefinmentOutputFileName)) { File.Delete(layoutRefinmentOutputFileName); } } }
/// <summary> /// Assemble the input sequences into the largest possible contigs. /// </summary> /// <param name="referenceSequence">The sequence used as backbone for assembly.</param> /// <param name="queryParser">The parser to load the sequences to assemble.</param> /// <returns>IComparativeAssembly instance which contains list of assembled sequences.</returns> public IEnumerable <ISequence> Assemble(IEnumerable <ISequence> referenceSequence, FastASequencePositionParser queryParser) { if (referenceSequence == null) { throw new ArgumentNullException("referenceSequence"); } if (queryParser == null) { throw new ArgumentNullException("queryParser"); } Stream readAlignmentOutputStream = null; Stream unsortedRepeatResolutionOutputStream = null; Stream repeatResolutionOutputStream = null; Stream unsortedLayoutRefinmentOutputStream = null; Stream layoutRefinmentOutputStream = null; try { // Converting to list to avoid multiple parse of the reference file if its a yield return var refSequences = referenceSequence.ToList(); // CacheSequencesForRandomAccess will ignore the call if called more than once. queryParser.CacheSequencesForRandomAccess(); IEnumerable <ISequence> reads = queryParser.Parse(); // Comparative Assembly Steps // 1) Read Alignment (Calling NUCmer for aligning reads to reference sequence) this.StatusEventStart(Properties.Resource.ReadAlignmentStarted); IEnumerable <DeltaAlignment> alignmentBetweenReferenceAndReads = this.ReadAlignment(refSequences, reads.Where(a => a.Count >= this.LengthOfMum)); readAlignmentOutputStream = PlatformManager.Services.CreateTempStream(); WriteDelta(alignmentBetweenReferenceAndReads, readAlignmentOutputStream); this.StatusEventEnd(Properties.Resource.ReadAlignmentEnded); // 2) Repeat Resolution this.StatusEventStart(Properties.Resource.RepeatResolutionStarted); DeltaAlignmentSorter sorter; unsortedRepeatResolutionOutputStream = PlatformManager.Services.CreateTempStream(); using (var deltaAlignmentFromReadAlignment = new DeltaAlignmentCollection(readAlignmentOutputStream, queryParser)) { IEnumerable <DeltaAlignment> repeatResolvedDeltas = RepeatResolution(deltaAlignmentFromReadAlignment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(repeatResolvedDeltas, sorter, unsortedRepeatResolutionOutputStream); } this.StatusEventEnd(Properties.Resource.RepeatResolutionEnded); this.StatusEventStart(Properties.Resource.SortingResolvedDeltasStarted); repeatResolutionOutputStream = PlatformManager.Services.CreateTempStream(); WriteSortedDelta(sorter, unsortedRepeatResolutionOutputStream, queryParser, repeatResolutionOutputStream); this.StatusEventEnd(Properties.Resource.SortingResolvedDeltasEnded); // 3) Layout Refinement this.StatusEventStart(Properties.Resource.LayoutRefinementStarted); layoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream(); using (var unsortedDeltaCollectionForLayoutRefinment = new DeltaAlignmentCollection(repeatResolutionOutputStream, queryParser)) { unsortedLayoutRefinmentOutputStream = PlatformManager.Services.CreateTempStream(); IEnumerable <DeltaAlignment> layoutRefinedDeltas = LayoutRefinment(unsortedDeltaCollectionForLayoutRefinment); sorter = new DeltaAlignmentSorter(refSequences[0].Count); WriteUnsortedDelta(layoutRefinedDeltas, sorter, unsortedLayoutRefinmentOutputStream); WriteSortedDelta(sorter, unsortedLayoutRefinmentOutputStream, queryParser, layoutRefinmentOutputStream); } this.StatusEventEnd(Properties.Resource.LayoutRefinementEnded); // 4) Consensus Generation this.StatusEventStart(Properties.Resource.ConsensusGenerationStarted); IList <ISequence> contigs; using (var delta = new DeltaAlignmentCollection(layoutRefinmentOutputStream, queryParser)) { contigs = this.ConsensusGenerator(delta).ToList(); } this.StatusEventEnd(Properties.Resource.ConsensusGenerationEnded); if (this.ScaffoldingEnabled) { // 5) Scaffold Generation this.StatusEventStart(Properties.Resource.ScaffoldGenerationStarted); IEnumerable <ISequence> scaffolds = this.ScaffoldsGenerator(contigs, reads); this.StatusEventEnd(Properties.Resource.ScaffoldGenerationEnded); return(scaffolds); } else { return(contigs); } } finally { // Cleanup temp files. if (readAlignmentOutputStream != null) { readAlignmentOutputStream.Dispose(); } if (unsortedRepeatResolutionOutputStream != null) { unsortedRepeatResolutionOutputStream.Dispose(); } if (repeatResolutionOutputStream != null) { repeatResolutionOutputStream.Dispose(); } if (unsortedLayoutRefinmentOutputStream != null) { unsortedLayoutRefinmentOutputStream.Dispose(); } if (layoutRefinmentOutputStream != null) { layoutRefinmentOutputStream.Dispose(); } } }