Ejemplo n.º 1
0
        /// <summary>
        /// Get simple paths in the graph.
        /// </summary>
        /// <returns>List of simple paths.</returns>
        private List <ISequence> GetSimplePaths(bool createContigSequences)
        {
            //set flag to false so we can find any nodes that are missed during the build
            _graph.SetNodeVisitState(false);
            List <ISequence> paths = new List <ISequence>();

            Parallel.ForEach(_graph.GetNodes(), node =>
            {
                int validLeftExtensionsCount  = node.LeftExtensionNodesCount;
                int validRightExtensionsCount = node.RightExtensionNodesCount;
                if (validLeftExtensionsCount + validRightExtensionsCount == 0)
                {
                    node.IsVisited = true;
                    // Island. Check coverage
                    if (_coverageThreshold == -1)
                    {
                        if (createContigSequences)
                        {
                            lock (paths)
                            {
                                paths.Add(_graph.GetNodeSequence(node));
                            }
                        }
                    }
                    else
                    {
                        if (node.KmerCount < _coverageThreshold)
                        {
                            node.MarkNodeForDelete();
                        }
                    }
                }
                else if (validLeftExtensionsCount == 1 && validRightExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, false, createContigSequences, true);
                }
                else if (validRightExtensionsCount == 1 && validLeftExtensionsCount == 0)
                {
                    TraceSimplePath(paths, node, true, createContigSequences, true);
                }
            });



            //All paths starting from ends have now been found, however graph nodes entirely enclosed in a
            //circular loop have been skipped, since these are small plasmids, etc.  fast enough to do not in parallel.
            //Must also be done sequentially to avoid grabbing nodes from the same circle in the graph concurrently
            foreach (var node in _graph.GetUnvisitedNodes())
            {
                TraceSimplePath(paths, node, true, createContigSequences, false);
            }

            // Reset flag state to false, likely unnecessary as any method using the visit state flag
            // should set it to false independently
            _graph.SetNodeVisitState(false);
            return(paths);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Condense redundant paths down to simple paths
 /// </summary>
 /// <returns>List of simple paths.</returns>
 private void CreateMetaNodes()
 {
     _graph.SetNodeVisitState(false);
     //First step now, condense all nodes into "MetaNodes" that are linearly connected.
     //Note: Loop avoids stack overflow.
     foreach (DeBruijnNode node in _graph.GetNodes())
     {
         if (node.IsVisited)
         {
             continue;
         }
         else
         {
             var metaNode = new MetaNode(node, _graph);
             MetaNodes.Add(metaNode);
         }
     }
 }
 /// <summary>
 /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
 /// </summary>
 /// <param name="graph">De Bruijn Graph.</param>
 public void RemoveUnconnectedNodes(DeBruijnGraph graph, IEnumerable <DeBruijnNode> referenceNodes)
 {
     //Basic strategy here, start at all reference nodes, go find everything that isn't in there
     //and remove it.
     DeBruijnGraph.ValidateGraph(graph);
     //Mark all nodes as not visited
     graph.SetNodeVisitState(false);
     //Now visit everyone that is connected to the reference somehow
     //This loop should spend basically all its time on the first node
     foreach (DeBruijnNode node in referenceNodes)
     {
         if (node.IsVisited)
         {
             continue;
         }
         else
         {
             visitAllConnectedNodes(node);
         }
     }
     //Now mark any unvisited node for deletion.
     Parallel.ForEach(graph.GetNodes(), new ParallelOptions()
     {
         MaxDegreeOfParallelism = Environment.ProcessorCount
     }, x => {
         if (!x.IsVisited)
         {
             x.MarkNodeForDelete();
         }
     });
     Parallel.ForEach(
         graph.GetNodes(),
         (node) =>
     {
         node.RemoveMarkedExtensions();
     });
     //Now to delete them, since they are not connected to anything we are keeping,
     //no need to alter the graph structure
     graph.RemoveMarkedNodes();
 }