/// <summary> /// Assemble the list of sequence reads /// </summary> /// <param name="inputSequences">List of input sequences</param> /// <returns>Assembled output</returns> public IDeNovoAssembly Assemble(IList <ISequence> inputSequences) { ValidateAllSequences(inputSequences); // Remove ambiguous reads and set up fields for assembler process Initialize(inputSequences); // Step 1, 2: Create k-mers from reads and build de bruijn graph CreateGraph(); // Estimate and set default value for erosion and coverage thresholds EstimateDefaultThresholds(); // Step 3: Remove dangling links from graph UnDangleGraph(); // Step 4: Remove redundant paths from graph RemoveRedundancy(); // Perform dangling link purger step once more. // This is done to remove any links created by redundant paths purger. UnDangleGraph(); // Step 5: Build Contigs IList <ISequence> contigSequences = BuildContigs(); PaDeNAAssembly result = new PaDeNAAssembly(); result.AddContigs(contigSequences); _graph.Dispose(); return(result); }
/// <summary> /// Generate sequences from list of contig nodes. /// </summary> /// <param name="contigGraph">Contig Overlap Graph.</param> /// <param name="paths">Scaffold paths.</param> /// <returns>List of sequences of scaffolds.</returns> protected IList <ISequence> GenerateScaffold( DeBruijnGraph contigGraph, IList <ScaffoldPath> paths) { if (contigGraph == null) { throw new ArgumentNullException("contigGraph"); } if (paths == null) { throw new ArgumentNullException("paths"); } List <ISequence> scaffolds = paths.AsParallel().Select(t => t.BuildSequenceFromPath(contigGraph, _kmerLength)).ToList(); IEnumerable <DeBruijnNode> visitedNodes = contigGraph.Nodes.AsParallel().Where(t => !t.IsMarked()); scaffolds.AddRange(visitedNodes.AsParallel().Select(t => contigGraph.GetNodeSequence(t))); contigGraph.Dispose(); return(scaffolds); }