/// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        public void RemoveLowCoverageNodes(DeBruijnGraph graph)
        {
            //Basic strategy here, start at all reference nodes, go find everything that isn't in there
            //and remove it.
            DeBruijnGraph.ValidateGraph(graph);
            //Mark all nodes as not visited
            //Now visit everyone that is connected to the reference somehow
            //Now mark any unvisited node for deletion.
            if (Bio.CrossPlatform.Environment.GetRunningPlatform() != Bio.CrossPlatform.Environment.Platform.Mac)
            {
                Parallel.ForEach(graph.GetNodes(), new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Environment.ProcessorCount
                }, x => {
                    if (x.KmerCount < CoverageCutOff)
                    {
                        x.MarkNodeForDelete();
                    }
                });
                Parallel.ForEach(
                    graph.GetNodes(),
                    (node) =>
                {
                    node.RemoveMarkedExtensions();
                });
            }
            else
            {
                foreach (var x in graph.GetNodes())
                {
                    if (x.KmerCount < CoverageCutOff)
                    {
                        x.MarkNodeForDelete();
                    }
                }
                foreach (var node in
                         graph.GetNodes())
                {
                    node.RemoveMarkedExtensions();
                }
            }

            //Now to delete them, since they are not connected to anything we are keeping,
            //no need to alter the graph structure
            graph.RemoveMarkedNodes();
        }
Пример #2
0
 /// <summary>
 /// Build contigs from graph. For contigs whose coverage is less than
 /// the specified threshold, remove graph nodes belonging to them.
 /// </summary>
 /// <param name="deBruijnGraph">DeBruijn Graph.</param>
 /// <param name="coverageThresholdForContigs">Coverage Threshold for contigs.</param>
 /// <returns>Number of nodes removed.</returns>
 public long RemoveLowCoverageContigs(DeBruijnGraph deBruijnGraph, double coverageThresholdForContigs)
 {
     if (deBruijnGraph == null)
     {
         throw new ArgumentNullException("deBruijnGraph");
     }
     if (coverageThresholdForContigs <= 0)
     {
         throw new ArgumentException("For removing low coverage contigs, coverage threshold should be a positive number");
     }
     this._coverageThreshold = coverageThresholdForContigs;
     this._graph             = deBruijnGraph;
     DeBruijnGraph.ValidateGraph(deBruijnGraph);
     this.ExcludeAmbiguousExtensions();
     Parallel.ForEach(deBruijnGraph.GetNodes(), n => n.ComputeValidExtensions());
     this.GetSimplePaths(false);
     Parallel.ForEach(deBruijnGraph.GetNodes(), n => n.UndoAmbiguousExtensions());
     return(deBruijnGraph.RemoveMarkedNodes());
 }
 /// <summary>
 /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
 /// </summary>
 /// <param name="graph">De Bruijn Graph.</param>
 public void RemoveUnconnectedNodes(DeBruijnGraph graph, IEnumerable <DeBruijnNode> referenceNodes)
 {
     //Basic strategy here, start at all reference nodes, go find everything that isn't in there
     //and remove it.
     DeBruijnGraph.ValidateGraph(graph);
     //Mark all nodes as not visited
     graph.SetNodeVisitState(false);
     //Now visit everyone that is connected to the reference somehow
     //This loop should spend basically all its time on the first node
     foreach (DeBruijnNode node in referenceNodes)
     {
         if (node.IsVisited)
         {
             continue;
         }
         else
         {
             visitAllConnectedNodes(node);
         }
     }
     //Now mark any unvisited node for deletion.
     Parallel.ForEach(graph.GetNodes(), new ParallelOptions()
     {
         MaxDegreeOfParallelism = Environment.ProcessorCount
     }, x => {
         if (!x.IsVisited)
         {
             x.MarkNodeForDelete();
         }
     });
     Parallel.ForEach(
         graph.GetNodes(),
         (node) =>
     {
         node.RemoveMarkedExtensions();
     });
     //Now to delete them, since they are not connected to anything we are keeping,
     //no need to alter the graph structure
     graph.RemoveMarkedNodes();
 }
Пример #4
0
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// After nodes are deleted, some new end-points might be created. We need to check for
        /// dangling links at these new points. This list is returned in the out parameter.
        ///
        /// TODO: Perhaps refactor code so that the graph is only manipulated by itself?
        /// Might make it easier to implement future performance improvements, or cost performance
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        private static IList <DeBruijnNode> RemoveErodedNodes(DeBruijnGraph graph)
        {
            bool eroded = graph.RemoveMarkedNodes() > 0;

            IList <DeBruijnNode> graphNodes = null;

            if (eroded)
            {
                graphNodes = graph.GetNodes().AsParallel().Where(n =>
                {
                    bool wasEndPoint = (n.LeftExtensionNodesCount == 0 || n.RightExtensionNodesCount == 0);
                    n.RemoveMarkedExtensions();
                    // Check if this is a new end point.
                    return(wasEndPoint || (n.LeftExtensionNodesCount == 0 || n.RightExtensionNodesCount == 0));
                }).ToList();
            }
            else
            {
                graphNodes = new List <DeBruijnNode>();
            }

            return(graphNodes);
        }
Пример #5
0
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        public static int RemovePathologicalNodes(DeBruijnGraph graph)
        {
            //Basic strategy here, start at all reference nodes, go find everything that isn't in there
            //and remove it.
            DeBruijnGraph.ValidateGraph(graph);

            var badSeq   = Enumerable.Repeat((byte)'A', graph.KmerLength).ToArray();
            var seq      = new Bio.Sequence(Bio.Alphabets.DNA, badSeq, false);
            var badkmer1 = KmerData32.GetKmers(seq, graph.KmerLength).First().KmerData;

            badSeq = Enumerable.Repeat((byte)'G', graph.KmerLength).ToArray();
            seq    = new Bio.Sequence(Bio.Alphabets.DNA, badSeq, false);
            var badkmer2     = KmerData32.GetKmers(seq, graph.KmerLength).First().KmerData;
            var badNodeCount = 0;

            foreach (var x in graph.GetNodes())
            {
                if (x.NodeValue.KmerData == badkmer1 ||
                    x.NodeValue.KmerData == badkmer2 ||
                    x.ContainsSelfReference)
                {
                    x.MarkNodeForDelete();
                    Interlocked.Increment(ref badNodeCount);
                }
            }

            foreach (var node in graph.GetNodes())
            {
                node.RemoveMarkedExtensions();
            }

            //Now to delete them, since they are not connected to anything we are keeping,
            //no need to alter the graph structure
            graph.RemoveMarkedNodes();
            return(badNodeCount);
        }