/// <summary> /// Checks if 'node' can be added to 'link' without /// violating any conditions pertaining to dangling links. /// Returns null if loop is found or length exceeds threshold. /// Otherwise, adds node to link and returns /// </summary> /// <param name="link">Dangling link</param> /// <param name="node">Node to be added</param> /// <param name="reachedErrorEndPoint">Indicates if we have reached end of dangling link</param> /// <returns>Updated dangling link</returns> private DeBruijnPath CheckAndAddDanglingNode(DeBruijnPath link, DeBruijnNode node, out bool reachedErrorEndPoint) { if (_erodeThreshold != -1 && link.PathNodes.Count == 0 && node.KmerCount < _erodeThreshold) { if (node.IsMarked()) { // There is a loop in this link. No need to update link. // Set flag for end point reached as true and return. reachedErrorEndPoint = true; return(link); } else { node.MarkNode(); reachedErrorEndPoint = false; return(link); } } if (link.PathNodes.Contains(node)) { // There is a loop in this link. No need to update link. // Set flag for end point reached as true and return. reachedErrorEndPoint = true; return(link); } if (link.PathNodes.Count >= _lengthThreshold) { // Length crosses threshold. Not a dangling link. // So set reached error end point as true and return null. reachedErrorEndPoint = true; return(null); } // No error conditions found. Add node to link. reachedErrorEndPoint = false; link.PathNodes.Add(node); return(link); }
/// <summary> /// Erode ends of graph that have coverage less than given erodeThreshold. /// As optimization, we also check for dangling links and keeps track of the /// lengths of the links found. No removal is done at this step. /// This is done to get an idea of the different lengths at /// which to run the dangling links purger step. /// This method returns the lengths of dangling links found. /// Locks: Method only does reads. No locking necessary here. /// </summary> /// <param name="graph">Input graph</param> /// <param name="erodeThreshold">Threshold for erosion</param> /// <returns>List of lengths of dangling links detected</returns> public IEnumerable <int> ErodeGraphEnds(DeBruijnGraph graph, int erodeThreshold = -1) { if (graph == null) { throw new ArgumentNullException("graph"); } _erodeThreshold = erodeThreshold; _danglingLinkLengths = new SortedSet <int>(); _danglingLinkExtensionTasks = new List <Task <int> >(); ICollection <DeBruijnNode> graphNodes = graph.Nodes; do { // Make graphNodes into an Array so that Range Partitioning can be used. DeBruijnNode[] graphNodesList = graphNodes.ToArray(); int rangeSize = (int)Math.Ceiling((float)graph.Nodes.Count / Environment.ProcessorCount); if (rangeSize != 0 && graphNodes.Count != 0) { _danglingLinkLengths.UnionWith( Partitioner.Create(0, graphNodesList.Length, rangeSize).AsParallel().SelectMany(chunk => { SortedSet <int> linkLengths = new SortedSet <int>(); for (int i = chunk.Item1; i < chunk.Item2; i++) { DeBruijnNode node = graphNodesList[i]; if (node.ExtensionsCount == 0) { if (_erodeThreshold != -1 && node.KmerCount < _erodeThreshold) { // Mark node for erosion node.MarkNode(); } else { // Single node island linkLengths.Add(1); } } else if (node.RightExtensionNodes.Count == 0) { // End of possible dangling link // Traceback to see if it is part of a dangling link DeBruijnPath link = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true); if (link != null && link.PathNodes.Count > 0) { linkLengths.Add(link.PathNodes.Count); } } else if (node.LeftExtensionNodes.Count == 0) { // End of possible dangling link // Traceback to see if it is part of a dangling link DeBruijnPath link = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true); if (link != null && link.PathNodes.Count > 0) { linkLengths.Add(link.PathNodes.Count); } } } return(linkLengths); })); // Remove eroded nodes. In the out paranter, get the list of new // end-points that was created by removing eroded nodes. RemoveErodedNodes(graph, out graphNodes); } } while (graphNodes != null && graphNodes.Count > 0); _erodeThreshold = -1; ExtendDanglingLinks(); return(_danglingLinkLengths); }