/// <summary> /// Validate scaffold sequence for a given input reads. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateScaffoldSequence(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string daglingThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DanglingLinkThresholdNode); string redundantThreshold = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RedundantThreshold); string libraray = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.LibraryName); string stdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName,Constants.Mean); string inputRedundancy = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.InputRedundancy); string expectedSeq = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScaffoldSeq); string[] scaffoldSeqNodes = expectedSeq.Split(','); // Get the input reads and build kmers using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate contig reads this.KmerLength = Int32.Parse(kmerLength, null); this.DanglingLinksThreshold = Int32.Parse(daglingThreshold, null); this.DanglingLinksPurger = new DanglingLinksPurger(Int32.Parse(daglingThreshold, null)); this.RedundantPathsPurger = new RedundantPathsPurger(Int32.Parse(redundantThreshold, null)); this.RedundantPathLengthThreshold = Int32.Parse(redundantThreshold, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); // Build contig. this.RemoveRedundancy(); IEnumerable<ISequence> contigs = this.BuildContigs(); // Find map paired reads. CloneLibrary.Instance.AddLibrary(libraray, float.Parse(mean, null), float.Parse(stdDeviation, null)); IEnumerable<ISequence> scaffoldSeq; using (GraphScaffoldBuilder scaffold = new GraphScaffoldBuilder()) { scaffoldSeq = scaffold.BuildScaffold( sequenceReads, contigs.ToList(), this.KmerLength, redundancy: Int32.Parse(inputRedundancy, null)); } AlignmentHelpers.CompareSequenceLists(new HashSet<string>(scaffoldSeqNodes), scaffoldSeq.ToList()); } ApplicationLog.WriteLine("PADENA P1 : Scaffold sequence : validation for Padena step6:step8 completed successfully"); }
/// <summary> /// Build scaffolds from contigs and paired reads (uses Padena Step 6 for assembly). /// </summary> /// <param name="contigs">List of contigs.</param> /// <param name="reads">List of paired reads.</param> /// <returns>List of scaffold sequences.</returns> private IEnumerable<ISequence> ScaffoldsGenerator(IEnumerable<ISequence> contigs, IEnumerable<ISequence> reads) { using (GraphScaffoldBuilder scaffoldBuilder = new GraphScaffoldBuilder()) { return scaffoldBuilder.BuildScaffold(reads, contigs.ToList(), this.KmerLength, this.Depth, this.ScaffoldRedundancy); } }
/// <summary> /// Refine layout in the delta alignments. /// </summary> public void GenerateScaffolds() { TimeSpan timeSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo inputFileinfo = new FileInfo(this.FilePath[0]); long inputFileLength = inputFileinfo.Length; inputFileinfo = null; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); FastAParser parser = new FastAParser(); IEnumerable<ISequence> contigs = parser.Parse(this.FilePath[0]); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed Contig file: {0}", Path.GetFullPath(this.FilePath[0])); Console.Error.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } inputFileinfo = new FileInfo(this.FilePath[1]); inputFileLength = inputFileinfo.Length; runAlgorithm.Restart(); FastAParser readParser = new FastAParser(); IEnumerable<ISequence> reads = readParser.Parse(this.FilePath[1]); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(); Console.Error.WriteLine(" Processed reads file: {0}", Path.GetFullPath(this.FilePath[1])); Console.Error.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.Error.WriteLine(" File Size : {0}", inputFileLength); } runAlgorithm.Restart(); IEnumerable<ISequence> scaffolds = null; using (GraphScaffoldBuilder scaffoldBuilder = new GraphScaffoldBuilder()) { scaffolds = scaffoldBuilder.BuildScaffold(reads, contigs.ToList(), this.KmerLength, this.Depth, this.Redundancy); } runAlgorithm.Stop(); timeSpan = timeSpan.Add(runAlgorithm.Elapsed); runAlgorithm.Restart(); this.WriteSequences(scaffolds); runAlgorithm.Stop(); if (this.Verbose) { Console.Error.WriteLine(" Compute time: {0}", timeSpan); Console.Error.WriteLine(" Write() time: {0}", runAlgorithm.Elapsed); } }
/// <summary> /// Step 6: Build scaffolds from contig list and paired reads. /// </summary> /// <param name="contigs">List of contigs.</param> /// <returns>List of scaffold sequences.</returns> protected IList<ISequence> BuildScaffolds(IList<ISequence> contigs) { if (ScaffoldBuilder == null) { // Scaffold Builder is a required module for this method. Set this to default. ScaffoldBuilder = new GraphScaffoldBuilder(); } return ScaffoldBuilder.BuildScaffold(SequenceReads, contigs, KmerLength, Depth, ScaffoldRedundancy); }
/// <summary> /// Generates the Scaffold. /// </summary> public void GenerateScaffold() { Output.WriteLine(OutputLevel.Information, Resources.ScaffoldStarting); if (this.FileNames.Length != 2) { Output.WriteLine(OutputLevel.Error, "\nError: A reference file and 1 query file are required."); Output.WriteLine(OutputLevel.Required, Resources.ScaffoldHelp); return; } if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = null; using (GraphScaffoldBuilder scaffoldBuilder = new GraphScaffoldBuilder()) { refFileinfo = new FileInfo(this.FileNames[0]); long refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable<ISequence> contigs = AssembleArguments.ParseFile(this.FileNames[0]); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed contigs file : {0}", Path.GetFullPath(this.FileNames[0])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); Output.WriteLine(OutputLevel.Verbose, " k-mer Length : {0}", this.KmerLength); } refFileinfo = new FileInfo(this.FileNames[1]); refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable<ISequence> reads = AssembleArguments.ParseFile(this.FileNames[1]); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed reads file : {0}", Path.GetFullPath(this.FileNames[1])); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); } runAlgorithm.Restart(); ValidateAmbiguousReads(reads); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); IList<ISequence> scaffolds = scaffoldBuilder.BuildScaffold(reads, contigs.ToList(), this.KmerLength, this.Depth, this.Redundancy); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); WriteContigs(scaffolds); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan); } } }