/// <summary> /// Get simple paths in the graph. /// </summary> /// <returns>List of simple paths.</returns> private List <ISequence> GetSimplePaths(bool createContigSequences) { //set flag to false so we can find any nodes that are missed during the build _graph.SetNodeVisitState(false); List <ISequence> paths = new List <ISequence>(); Parallel.ForEach(_graph.GetNodes(), node => { int validLeftExtensionsCount = node.LeftExtensionNodesCount; int validRightExtensionsCount = node.RightExtensionNodesCount; if (validLeftExtensionsCount + validRightExtensionsCount == 0) { node.IsVisited = true; // Island. Check coverage if (_coverageThreshold == -1) { if (createContigSequences) { lock (paths) { paths.Add(_graph.GetNodeSequence(node)); } } } else { if (node.KmerCount < _coverageThreshold) { node.MarkNodeForDelete(); } } } else if (validLeftExtensionsCount == 1 && validRightExtensionsCount == 0) { TraceSimplePath(paths, node, false, createContigSequences, true); } else if (validRightExtensionsCount == 1 && validLeftExtensionsCount == 0) { TraceSimplePath(paths, node, true, createContigSequences, true); } }); //All paths starting from ends have now been found, however graph nodes entirely enclosed in a //circular loop have been skipped, since these are small plasmids, etc. fast enough to do not in parallel. //Must also be done sequentially to avoid grabbing nodes from the same circle in the graph concurrently foreach (var node in _graph.GetUnvisitedNodes()) { TraceSimplePath(paths, node, true, createContigSequences, false); } // Reset flag state to false, likely unnecessary as any method using the visit state flag // should set it to false independently _graph.SetNodeVisitState(false); return(paths); }
/// <summary> /// Condense redundant paths down to simple paths /// </summary> /// <returns>List of simple paths.</returns> private void CreateMetaNodes() { _graph.SetNodeVisitState(false); //First step now, condense all nodes into "MetaNodes" that are linearly connected. //Note: Loop avoids stack overflow. foreach (DeBruijnNode node in _graph.GetNodes()) { if (node.IsVisited) { continue; } else { var metaNode = new MetaNode(node, _graph); MetaNodes.Add(metaNode); } } }
/// <summary> /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables. /// </summary> /// <param name="graph">De Bruijn Graph.</param> public void RemoveUnconnectedNodes(DeBruijnGraph graph, IEnumerable <DeBruijnNode> referenceNodes) { //Basic strategy here, start at all reference nodes, go find everything that isn't in there //and remove it. DeBruijnGraph.ValidateGraph(graph); //Mark all nodes as not visited graph.SetNodeVisitState(false); //Now visit everyone that is connected to the reference somehow //This loop should spend basically all its time on the first node foreach (DeBruijnNode node in referenceNodes) { if (node.IsVisited) { continue; } else { visitAllConnectedNodes(node); } } //Now mark any unvisited node for deletion. Parallel.ForEach(graph.GetNodes(), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, x => { if (!x.IsVisited) { x.MarkNodeForDelete(); } }); Parallel.ForEach( graph.GetNodes(), (node) => { node.RemoveMarkedExtensions(); }); //Now to delete them, since they are not connected to anything we are keeping, //no need to alter the graph structure graph.RemoveMarkedNodes(); }