/// <summary>
        /// Removes nodes that are part of redundant paths.
        /// </summary>
        /// <param name="deBruijnGraph">De Bruijn graph.</param>
        /// <param name="nodesList">Path nodes to be deleted.</param>
        public void RemoveErroneousNodes(DeBruijnGraph deBruijnGraph, DeBruijnPathList nodesList)
        {
            DeBruijnGraph.ValidateGraph(deBruijnGraph);
            if (nodesList == null)
            {
                throw new ArgumentNullException("nodesList");
            }

            this.graph = deBruijnGraph;

            // Neighbors of all nodes have to be updated.
            HashSet <DeBruijnNode> deleteNodes = new HashSet <DeBruijnNode>(
                nodesList.Paths.AsParallel().SelectMany(nl => nl.PathNodes));

            // Update extensions for deletion
            // No need for read-write lock as deleteNode's dictionary is being read,
            // and only other graph node's dictionaries are updated.
            Parallel.ForEach(
                deleteNodes,
                node =>
            {
                foreach (DeBruijnNode extension in node.GetExtensionNodes())
                {
                    // If the neighbor is also to be deleted, there is no use of updation in that case
                    if (!deleteNodes.Contains(extension))
                    {
                        extension.RemoveExtensionThreadSafe(node);
                    }
                }
            });

            // Delete nodes from graph
            this.graph.RemoveNodes(deleteNodes);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Detect nodes that are on redundant paths.
        /// Start from any node that has ambiguous (more than one) extensions.
        /// From this node, trace path for each extension until either they
        /// converge to a single node or threshold length is exceeded.
        /// In case they converge, we have a set of redundant paths.
        /// We pick the best path based on the kmer counts of the path nodes.
        /// All paths other than the best one are returned for removal.
        /// Locks: Method only does reads. No locking necessary here or its callees.
        /// </summary>
        /// <param name="deBruijnGraph">De Bruijn Graph.</param>
        /// <returns>List of path nodes to be deleted.</returns>
        public DeBruijnPathList DetectErroneousNodes(DeBruijnGraph deBruijnGraph)
        {
            if (deBruijnGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            DeBruijnGraph.ValidateGraph(deBruijnGraph);
            this.graph = deBruijnGraph;

            List <DeBruijnPathList> redundantPaths = new List <DeBruijnPathList>();

            Parallel.ForEach(
                deBruijnGraph.GetNodes(),
                node =>
            {
                // Need to check for both left and right extensions for ambiguity.
                if (node.RightExtensionNodesCount > 1)
                {
                    TraceDivergingExtensionPaths(node, node.GetRightExtensionNodesWithOrientation(), true, redundantPaths);
                }

                if (node.LeftExtensionNodesCount > 1)
                {
                    TraceDivergingExtensionPaths(node, node.GetLeftExtensionNodesWithOrientation(), false, redundantPaths);
                }
            });

            redundantPaths = RemoveDuplicates(redundantPaths);
            return(DetachBestPath(redundantPaths));
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Build contig sequences from the graph.
 /// </summary>
 /// <param name="graph">De Bruijn graph</param>
 /// <returns>List of contig data</returns>
 public IList <ISequence> Build(DeBruijnGraph graph)
 {
     _graph             = graph;
     _coverageThreshold = -1;
     DeBruijnGraph.ValidateGraph(_graph);
     ExcludeAmbiguousExtensions();
     _graph.Nodes.AsParallel().ForAll(n => n.PurgeInvalidExtensions());
     return(GetSimplePaths());
 }
Ejemplo n.º 4
0
 /// <summary>
 /// Build contig sequences from the graph.
 /// </summary>
 /// <param name="deBruijnGraph">De Bruijn graph.</param>
 /// <returns>List of contig data.</returns>
 public IEnumerable <ISequence> Build(DeBruijnGraph deBruijnGraph)
 {
     if (deBruijnGraph == null)
     {
         throw new ArgumentNullException("deBruijnGraph");
     }
     this._graph             = deBruijnGraph;
     this._coverageThreshold = Double.NaN;
     DeBruijnGraph.ValidateGraph(deBruijnGraph);
     this.ExcludeAmbiguousExtensions();
     Parallel.ForEach(_graph.GetNodes(), n => n.PurgeInvalidExtensions());
     return(this.GetSimplePaths(true));
 }
Ejemplo n.º 5
0
        /// <summary>
        /// Build contigs from graph. For contigs whose coverage is less than
        /// the specified threshold, remove graph nodes belonging to them.
        /// </summary>
        /// <param name="graph">DeBruijn Graph</param>
        /// <param name="coverageThreshold">Coverage Threshold for contigs</param>
        /// <returns>Number of nodes removed</returns>
        public int RemoveLowCoverageContigs(DeBruijnGraph graph, double coverageThreshold)
        {
            if (coverageThreshold <= 0)
            {
                throw new ArgumentException("For removing low coverage contigs, coverage threshold should be a positive number");
            }

            _coverageThreshold = coverageThreshold;
            _graph             = graph;
            DeBruijnGraph.ValidateGraph(_graph);
            ExcludeAmbiguousExtensions();
            _graph.Nodes.AsParallel().ForAll(n => n.ComputeValidExtensions());
            GetSimplePaths();
            _graph.Nodes.AsParallel().ForAll(n => n.UndoAmbiguousExtensions());
            return(_graph.Nodes.RemoveWhere(n => n.IsMarked()));
        }
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        public void RemoveLowCoverageNodes(DeBruijnGraph graph)
        {
            //Basic strategy here, start at all reference nodes, go find everything that isn't in there
            //and remove it.
            DeBruijnGraph.ValidateGraph(graph);
            //Mark all nodes as not visited
            //Now visit everyone that is connected to the reference somehow
            //Now mark any unvisited node for deletion.
            if (Bio.CrossPlatform.Environment.GetRunningPlatform() != Bio.CrossPlatform.Environment.Platform.Mac)
            {
                Parallel.ForEach(graph.GetNodes(), new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Environment.ProcessorCount
                }, x => {
                    if (x.KmerCount < CoverageCutOff)
                    {
                        x.MarkNodeForDelete();
                    }
                });
                Parallel.ForEach(
                    graph.GetNodes(),
                    (node) =>
                {
                    node.RemoveMarkedExtensions();
                });
            }
            else
            {
                foreach (var x in graph.GetNodes())
                {
                    if (x.KmerCount < CoverageCutOff)
                    {
                        x.MarkNodeForDelete();
                    }
                }
                foreach (var node in
                         graph.GetNodes())
                {
                    node.RemoveMarkedExtensions();
                }
            }

            //Now to delete them, since they are not connected to anything we are keeping,
            //no need to alter the graph structure
            graph.RemoveMarkedNodes();
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Build contigs from graph. For contigs whose coverage is less than
 /// the specified threshold, remove graph nodes belonging to them.
 /// </summary>
 /// <param name="deBruijnGraph">DeBruijn Graph.</param>
 /// <param name="coverageThresholdForContigs">Coverage Threshold for contigs.</param>
 /// <returns>Number of nodes removed.</returns>
 public long RemoveLowCoverageContigs(DeBruijnGraph deBruijnGraph, double coverageThresholdForContigs)
 {
     if (deBruijnGraph == null)
     {
         throw new ArgumentNullException("deBruijnGraph");
     }
     if (coverageThresholdForContigs <= 0)
     {
         throw new ArgumentException("For removing low coverage contigs, coverage threshold should be a positive number");
     }
     this._coverageThreshold = coverageThresholdForContigs;
     this._graph             = deBruijnGraph;
     DeBruijnGraph.ValidateGraph(deBruijnGraph);
     this.ExcludeAmbiguousExtensions();
     Parallel.ForEach(deBruijnGraph.GetNodes(), n => n.ComputeValidExtensions());
     this.GetSimplePaths(false);
     Parallel.ForEach(deBruijnGraph.GetNodes(), n => n.UndoAmbiguousExtensions());
     return(deBruijnGraph.RemoveMarkedNodes());
 }
 /// <summary>
 /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
 /// </summary>
 /// <param name="graph">De Bruijn Graph.</param>
 public void RemoveUnconnectedNodes(DeBruijnGraph graph, IEnumerable <DeBruijnNode> referenceNodes)
 {
     //Basic strategy here, start at all reference nodes, go find everything that isn't in there
     //and remove it.
     DeBruijnGraph.ValidateGraph(graph);
     //Mark all nodes as not visited
     graph.SetNodeVisitState(false);
     //Now visit everyone that is connected to the reference somehow
     //This loop should spend basically all its time on the first node
     foreach (DeBruijnNode node in referenceNodes)
     {
         if (node.IsVisited)
         {
             continue;
         }
         else
         {
             visitAllConnectedNodes(node);
         }
     }
     //Now mark any unvisited node for deletion.
     Parallel.ForEach(graph.GetNodes(), new ParallelOptions()
     {
         MaxDegreeOfParallelism = Environment.ProcessorCount
     }, x => {
         if (!x.IsVisited)
         {
             x.MarkNodeForDelete();
         }
     });
     Parallel.ForEach(
         graph.GetNodes(),
         (node) =>
     {
         node.RemoveMarkedExtensions();
     });
     //Now to delete them, since they are not connected to anything we are keeping,
     //no need to alter the graph structure
     graph.RemoveMarkedNodes();
 }
Ejemplo n.º 9
0
        /// <summary>
        /// Detect nodes that are on redundant paths.
        /// Start from any node that has ambiguous (more than one) extensions.
        /// From this node, trace path for each extension until either they
        /// converge to a single node or threshold length is exceeded.
        /// In case they converge, we have a set of redundant paths.
        /// We pick the best path based on the kmer counts of the path nodes.
        /// All paths other than the best one are returned for removal.
        /// Locks: Method only does reads. No locking necessary here or its callees.
        /// </summary>
        /// <param name="graph">De Bruijn Graph</param>
        /// <returns>List of path nodes to be deleted</returns>
        public DeBruijnPathList DetectErroneousNodes(DeBruijnGraph graph)
        {
            DeBruijnGraph.ValidateGraph(graph);
            _graph = graph;

            List <DeBruijnPathList> redundantPaths = new List <DeBruijnPathList>();

            Parallel.ForEach(_graph.Nodes, node =>
            {
                // Need to check for both left and right extensions for ambiguity.
                if (node.RightExtensionNodes.Count > 1)
                {
                    TraceDivergingExtensionPaths(node, node.RightExtensionNodes, true, redundantPaths);
                }

                if (node.LeftExtensionNodes.Count > 1)
                {
                    TraceDivergingExtensionPaths(node, node.LeftExtensionNodes, false, redundantPaths);
                }
            });

            redundantPaths = RemoveDuplicates(redundantPaths);
            return(DetachBestPath(redundantPaths));
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        public static int RemovePathologicalNodes(DeBruijnGraph graph)
        {
            //Basic strategy here, start at all reference nodes, go find everything that isn't in there
            //and remove it.
            DeBruijnGraph.ValidateGraph(graph);

            var badSeq   = Enumerable.Repeat((byte)'A', graph.KmerLength).ToArray();
            var seq      = new Bio.Sequence(Bio.Alphabets.DNA, badSeq, false);
            var badkmer1 = KmerData32.GetKmers(seq, graph.KmerLength).First().KmerData;

            badSeq = Enumerable.Repeat((byte)'G', graph.KmerLength).ToArray();
            seq    = new Bio.Sequence(Bio.Alphabets.DNA, badSeq, false);
            var badkmer2     = KmerData32.GetKmers(seq, graph.KmerLength).First().KmerData;
            var badNodeCount = 0;

            foreach (var x in graph.GetNodes())
            {
                if (x.NodeValue.KmerData == badkmer1 ||
                    x.NodeValue.KmerData == badkmer2 ||
                    x.ContainsSelfReference)
                {
                    x.MarkNodeForDelete();
                    Interlocked.Increment(ref badNodeCount);
                }
            }

            foreach (var node in graph.GetNodes())
            {
                node.RemoveMarkedExtensions();
            }

            //Now to delete them, since they are not connected to anything we are keeping,
            //no need to alter the graph structure
            graph.RemoveMarkedNodes();
            return(badNodeCount);
        }