Ejemplo n.º 1
0
        /// <summary>
        /// Build contigs from graph. For contigs whose coverage is less than 
        /// the specified threshold, remove graph nodes belonging to them.
        /// </summary>
        /// <param name="deBruijnGraph">DeBruijn Graph.</param>
        /// <param name="coverageThresholdForContigs">Coverage Threshold for contigs.</param>
        /// <returns>Number of nodes removed.</returns>
        public long RemoveLowCoverageContigs(DeBruijnGraph deBruijnGraph, double coverageThresholdForContigs)
        {
            if (deBruijnGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            if (coverageThresholdForContigs <= 0)
            {
                throw new ArgumentException("For removing low coverage contigs, coverage threshold should be a positive number");
            }

            _coverageThreshold = coverageThresholdForContigs;
            _graph = deBruijnGraph;
            DeBruijnGraph.ValidateGraph(deBruijnGraph);
            ExcludeAmbiguousExtensions();
            Parallel.ForEach(deBruijnGraph.GetNodes(), n => n.ComputeValidExtensions());
            GetSimplePaths(false);
            Parallel.ForEach(deBruijnGraph.GetNodes(),n=>n.UndoAmbiguousExtensions());
            return deBruijnGraph.RemoveMarkedNodes();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// After nodes are deleted, some new end-points might be created. We need to check for 
        /// dangling links at these new points. This list is returned in the out parameter.
        /// 
        /// TODO: Perhaps refactor code so that the graph is only manipulated by itself?
        /// Might make it easier to implement future performance improvements, or cost performance
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        private static IList<DeBruijnNode> RemoveErodedNodes(DeBruijnGraph graph)
        {
            bool eroded = graph.RemoveMarkedNodes()>0;

            IList<DeBruijnNode> graphNodes;
            if (eroded)
            {
                graphNodes = graph.GetNodes().AsParallel().Where(n =>
                {
                    bool wasEndPoint = (n.LeftExtensionNodesCount == 0 || n.RightExtensionNodesCount == 0);
                    n.RemoveMarkedExtensions();
                    // Check if this is a new end point.
                    return (wasEndPoint || (n.LeftExtensionNodesCount == 0 || n.RightExtensionNodesCount == 0));
                }).ToList();
            }
            else
            {
                graphNodes = new List<DeBruijnNode>();
            }

            return graphNodes;
        }