/// <summary> /// Assemble the list of sequence reads. /// </summary> /// <param name="inputSequences">List of input sequences.</param> /// <returns>Assembled output.</returns> public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences) { if (inputSequences == null) { throw new ArgumentNullException("inputSequences"); } this.sequenceReads = inputSequences; // Remove ambiguous reads and set up fields for assembler process this.Initialize(); // Step 1, 2: Create k-mers from reads and build de bruijn graph this.CreateGraphStarted(); this.CreateGraph(); this.CreateGraphEnded(); // Estimate and set default value for erosion and coverage thresholds this.EstimateDefaultValuesStarted(); this.EstimateDefaultThresholds(); this.EstimateDefaultValuesEnded(); // Step 3: Remove dangling links from graph this.UndangleGraphStarted(); this.UnDangleGraph(); this.UndangleGraphEnded(); // Step 4: Remove redundant paths from graph this.RemoveRedundancyStarted(); this.RemoveRedundancy(); this.RemoveRedundancyEnded(); // Perform dangling link purger step once more. // This is done to remove any links created by redundant paths purger. this.statusMessage = string.Format(CultureInfo.CurrentCulture, "\n UndangleGraph - Start time: {0}", DateTime.Now); this.RaiseStatusEvent(); this.UnDangleGraph(); this.statusMessage = string.Format(CultureInfo.CurrentCulture, "\n UndangleGraph - End time: {0}", DateTime.Now); this.RaiseStatusEvent(); // Step 5: Build Contigs this.BuildContigsStarted(); IEnumerable <ISequence> contigSequences = this.BuildContigs(); this.BuildContigsEnded(); PadenaAssembly result = new PadenaAssembly(); result.AddContigs(contigSequences); return(result); }
/// <summary> /// Assemble the list of sequence reads. Also performs the /// scaffold building step as part of assembly process. /// </summary> /// <param name="inputSequences">List of input sequences.</param> /// <param name="includeScaffolds">Boolean indicating whether scaffold building step has to be run.</param> /// <returns>Assembled output.</returns> public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences, bool includeScaffolds) { PadenaAssembly assemblyResult = (PadenaAssembly)this.Assemble(inputSequences); if (includeScaffolds) { // Step 6: Build _scaffolds IList <ISequence> scaffolds = this.BuildScaffolds(assemblyResult.ContigSequences); if (scaffolds != null) { assemblyResult.AddScaffolds(scaffolds); } } return(assemblyResult); }
/// <summary> /// Assemble the list of sequence reads. /// </summary> /// <param name="inputSequences">List of input sequences.</param> /// <returns>Assembled output.</returns> public IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences) { if (inputSequences == null) { throw new ArgumentNullException("inputSequences"); } this.sequenceReads = inputSequences; // Remove ambiguous reads and set up fields for assembler process this.Initialize(); // Step 1, 2: Create k-mers from reads and build de bruijn graph this.CreateGraph(); // Estimate and set default value for erosion and coverage thresholds this.EstimateDefaultThresholds(); // Step 3: Remove dangling links from graph this.UnDangleGraph(); // Step 4: Remove redundant paths from graph this.RemoveRedundancy(); // Perform dangling link purger step once more. // This is done to remove any links created by redundant paths purger. this.UnDangleGraph(); // Step 5: Build Contigs IEnumerable <ISequence> contigSequences = this.BuildContigs(); PadenaAssembly result = new PadenaAssembly(); result.AddContigs(contigSequences); return(result); }
/// <summary> /// Assemble the list of sequence reads. /// </summary> /// <param name="inputSequences">List of input sequences.</param> /// <returns>Assembled output.</returns> public virtual IDeNovoAssembly Assemble(IEnumerable <ISequence> inputSequences) { if (inputSequences == null) { throw new ArgumentNullException("inputSequences"); } this._sequenceReads = inputSequences; // Remove ambiguous reads and set up fields for assembler process this.Initialize(); // Step 1, 2: Create k-mers from reads and build de bruijn graph Stopwatch sw = Stopwatch.StartNew(); this.CreateGraphStarted(); this.CreateGraph(); sw.Stop(); this.CreateGraphEnded(); this.TaskTimeSpanReport(sw.Elapsed); this.NodeCountReport(); // Estimate and set default value for erosion and coverage thresholds sw = Stopwatch.StartNew(); this.EstimateDefaultValuesStarted(); this.EstimateDefaultThresholds(); sw.Stop(); this.EstimateDefaultValuesEnded(); this.TaskTimeSpanReport(sw.Elapsed); // Step 3: Remove dangling links from graph sw = Stopwatch.StartNew(); this.UndangleGraphStarted(); this.UnDangleGraph(); sw.Stop(); this.UndangleGraphEnded(); this.TaskTimeSpanReport(sw.Elapsed); this.NodeCountReport(); // Step 4: Remove redundant paths from graph sw = Stopwatch.StartNew(); this.RemoveRedundancyStarted(); this.RemoveRedundancy(); this.NodeCountReport(); // Perform dangling link purger step once more. // This is done to remove any links created by redundant paths purger. this._statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphStarted, DateTime.Now); this.RaiseStatusEvent(); this.UnDangleGraph(); this._statusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphEnded, DateTime.Now); this.RaiseStatusEvent(); // Report end after undangle sw.Stop(); this.RemoveRedundancyEnded(); this.TaskTimeSpanReport(sw.Elapsed); this.NodeCountReport(); // Step 5: Build Contigs sw = Stopwatch.StartNew(); this.BuildContigsStarted(); IEnumerable <ISequence> contigSequences = this.BuildContigs(); sw.Stop(); this.BuildContigsEnded(); this.TaskTimeSpanReport(sw.Elapsed); PadenaAssembly result = new PadenaAssembly(); result.AddContigs(contigSequences); return(result); }
/// <summary> /// Assemble the list of sequence reads. /// </summary> /// <param name="inputSequences">List of input sequences.</param> /// <returns>Assembled output.</returns> public virtual IDeNovoAssembly Assemble(IEnumerable<ISequence> inputSequences) { if (inputSequences == null) { throw new ArgumentNullException("inputSequences"); } this.sequenceReads = inputSequences; CancellationTokenSource cts = new CancellationTokenSource(); ReportIntermediateProgress(cts.Token); try { // Remove ambiguous reads and set up fields for assembler process Initialize(); // Step 1, 2: Create k-mers from reads and build de bruijn graph Stopwatch sw = Stopwatch.StartNew(); CreateGraphStarted(); CreateGraph(); sw.Stop(); CreateGraphEnded(); TaskTimeSpanReport(sw.Elapsed); NodeCountReport(); // Estimate and set default value for erosion and coverage thresholds sw = Stopwatch.StartNew(); EstimateDefaultValuesStarted(); EstimateDefaultThresholds(); sw.Stop(); EstimateDefaultValuesEnded(); TaskTimeSpanReport(sw.Elapsed); // Step 3: Remove dangling links from graph sw = Stopwatch.StartNew(); UndangleGraphStarted(); UnDangleGraph(); sw.Stop(); UndangleGraphEnded(); TaskTimeSpanReport(sw.Elapsed); NodeCountReport(); // Step 4: Remove redundant paths from graph sw = Stopwatch.StartNew(); RemoveRedundancyStarted(); RemoveRedundancy(); NodeCountReport(); // Perform dangling link purger step once more. // This is done to remove any links created by redundant paths purger. StatusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphStarted, DateTime.Now); UnDangleGraph(); StatusMessage = string.Format(CultureInfo.CurrentCulture, Properties.Resource.SecondaryUndangleGraphEnded, DateTime.Now); // Report end after undangle sw.Stop(); RemoveRedundancyEnded(); TaskTimeSpanReport(sw.Elapsed); NodeCountReport(); // Step 5: Build Contigs sw = Stopwatch.StartNew(); BuildContigsStarted(); IEnumerable<ISequence> contigSequences = BuildContigs(); sw.Stop(); BuildContigsEnded(); TaskTimeSpanReport(sw.Elapsed); PadenaAssembly result = new PadenaAssembly(); result.AddContigs(contigSequences); return result; } finally { cts.Cancel(); } }
/// <summary> /// Validate ParallelDenovoAssembler class properties. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ParallelDenovoAssemblyProperties(string nodeName) { // Get values from XML node. string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string library = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LibraryName); string StdDeviation = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.StdDeviation); string mean = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.Mean); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Build kmers from step1,graph in step2 // Remove the dangling links from graph in step3 // Remove bubbles form the graph in step4 // Pass the graph and build contigs // Validate the contigs this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); this.CreateGraph(); this.UnDangleGraph(); this.RedundantPathsPurger = new RedundantPathsPurger(int.Parse(kmerLength, (IFormatProvider)null) + 1); this.RemoveRedundancy(); this.ContigBuilder = new SimplePathContigBuilder(); // Build contigs IEnumerable<ISequence> contigs = this.BuildContigs(); CloneLibraryInformation cloneLibInfoObj = new CloneLibraryInformation(); cloneLibInfoObj.LibraryName = library; cloneLibInfoObj.MeanLengthOfInsert = float.Parse(mean, (IFormatProvider)null); cloneLibInfoObj.StandardDeviationOfInsert = float.Parse(StdDeviation, (IFormatProvider)null); // Build scaffolds. CloneLibrary.Instance.AddLibrary(library, float.Parse(mean, (IFormatProvider)null), float.Parse(StdDeviation, (IFormatProvider)null)); IEnumerable<ISequence> scaffolds = BuildScaffolds(contigs.ToList()); PadenaAssembly denovoAssembly = new PadenaAssembly(); denovoAssembly.AddContigs(contigs); denovoAssembly.AddScaffolds(scaffolds); Assert.AreEqual(denovoAssembly.ContigSequences.Count(), contigs.Count()); Assert.AreEqual(denovoAssembly.Scaffolds.Count(), scaffolds.Count()); Assert.IsNull(denovoAssembly.Documentation); // Validates the Clone Library for the existing clone CloneLibraryInformation actualObj = CloneLibrary.Instance.GetLibraryInformation(library); Assert.IsTrue(actualObj.Equals(cloneLibInfoObj)); ApplicationLog.WriteLine("CloneLibraryInformation Equals() is successfully validated"); } // Validate ParallelDenovoAssembler properties. ApplicationLog.WriteLine( @"Padena BVT : Validated ParallelDenovo Assembly properties"); }