Exemple #1
0
        /// <summary>
        /// Checks if 'node' can be added to 'link' without
        /// violating any conditions pertaining to dangling links.
        /// Returns null if loop is found or length exceeds threshold.
        /// Otherwise, adds node to link and returns
        /// </summary>
        /// <param name="link">Dangling link</param>
        /// <param name="node">Node to be added</param>
        /// <param name="reachedErrorEndPoint">Indicates if we have reached end of dangling link</param>
        /// <returns>Updated dangling link</returns>
        private DeBruijnPath CheckAndAddDanglingNode(DeBruijnPath link, DeBruijnNode node, out bool reachedErrorEndPoint)
        {
            if (_erodeThreshold != -1 &&
                link.PathNodes.Count == 0 &&
                node.KmerCount < _erodeThreshold)
            {
                if (node.IsMarked())
                {
                    // There is a loop in this link. No need to update link.
                    // Set flag for end point reached as true and return.
                    reachedErrorEndPoint = true;
                    return(link);
                }
                else
                {
                    node.MarkNode();
                    reachedErrorEndPoint = false;
                    return(link);
                }
            }

            if (link.PathNodes.Contains(node))
            {
                // There is a loop in this link. No need to update link.
                // Set flag for end point reached as true and return.
                reachedErrorEndPoint = true;
                return(link);
            }

            if (link.PathNodes.Count >= _lengthThreshold)
            {
                // Length crosses threshold. Not a dangling link.
                // So set reached error end point as true and return null.
                reachedErrorEndPoint = true;
                return(null);
            }

            // No error conditions found. Add node to link.
            reachedErrorEndPoint = false;
            link.PathNodes.Add(node);
            return(link);
        }
Exemple #2
0
        /// <summary>
        /// Erode ends of graph that have coverage less than given erodeThreshold.
        /// As optimization, we also check for dangling links and keeps track of the
        /// lengths of the links found. No removal is done at this step.
        /// This is done to get an idea of the different lengths at
        /// which to run the dangling links purger step.
        /// This method returns the lengths of dangling links found.
        /// Locks: Method only does reads. No locking necessary here.
        /// </summary>
        /// <param name="graph">Input graph</param>
        /// <param name="erodeThreshold">Threshold for erosion</param>
        /// <returns>List of lengths of dangling links detected</returns>
        public IEnumerable <int> ErodeGraphEnds(DeBruijnGraph graph, int erodeThreshold = -1)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            _erodeThreshold             = erodeThreshold;
            _danglingLinkLengths        = new SortedSet <int>();
            _danglingLinkExtensionTasks = new List <Task <int> >();
            ICollection <DeBruijnNode> graphNodes = graph.Nodes;

            do
            {
                // Make graphNodes into an Array so that Range Partitioning can be used.
                DeBruijnNode[] graphNodesList = graphNodes.ToArray();
                int            rangeSize      = (int)Math.Ceiling((float)graph.Nodes.Count / Environment.ProcessorCount);

                if (rangeSize != 0 && graphNodes.Count != 0)
                {
                    _danglingLinkLengths.UnionWith(
                        Partitioner.Create(0, graphNodesList.Length, rangeSize).AsParallel().SelectMany(chunk =>
                    {
                        SortedSet <int> linkLengths = new SortedSet <int>();
                        for (int i = chunk.Item1; i < chunk.Item2; i++)
                        {
                            DeBruijnNode node = graphNodesList[i];
                            if (node.ExtensionsCount == 0)
                            {
                                if (_erodeThreshold != -1 && node.KmerCount < _erodeThreshold)
                                {
                                    // Mark node for erosion
                                    node.MarkNode();
                                }
                                else
                                {
                                    // Single node island
                                    linkLengths.Add(1);
                                }
                            }
                            else if (node.RightExtensionNodes.Count == 0)
                            {
                                // End of possible dangling link
                                // Traceback to see if it is part of a dangling link
                                DeBruijnPath link = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                                if (link != null && link.PathNodes.Count > 0)
                                {
                                    linkLengths.Add(link.PathNodes.Count);
                                }
                            }
                            else if (node.LeftExtensionNodes.Count == 0)
                            {
                                // End of possible dangling link
                                // Traceback to see if it is part of a dangling link
                                DeBruijnPath link = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                                if (link != null && link.PathNodes.Count > 0)
                                {
                                    linkLengths.Add(link.PathNodes.Count);
                                }
                            }
                        }
                        return(linkLengths);
                    }));

                    // Remove eroded nodes. In the out paranter, get the list of new
                    // end-points that was created by removing eroded nodes.
                    RemoveErodedNodes(graph, out graphNodes);
                }
            } while (graphNodes != null && graphNodes.Count > 0);

            _erodeThreshold = -1;
            ExtendDanglingLinks();
            return(_danglingLinkLengths);
        }