/// <summary> /// Removes nodes that are part of dangling links. /// </summary> /// <param name="deBruijnGraph">Input graph.</param> /// <param name="nodesList">List of dangling link nodes.</param> public void RemoveErroneousNodes(DeBruijnGraph deBruijnGraph, DeBruijnPathList nodesList) { // Argument Validation if (deBruijnGraph == null) throw new ArgumentNullException("deBruijnGraph"); if (nodesList == null) throw new ArgumentNullException("nodesList"); var lastNodes = new HashSet<DeBruijnNode>(nodesList.Paths.Select(nl => nl.PathNodes.Last())); // Update extensions and Delete nodes from graph. deBruijnGraph.RemoveNodes( nodesList.Paths.AsParallel().SelectMany(nodes => { RemoveLinkNodes(nodes, lastNodes); return nodes.PathNodes; })); }
/// <summary> /// Gets end node of redundant path cluster /// All paths in input are part of a redundant path cluster /// So all of them have the same start and the end node. /// Return the last node of first path. /// </summary> /// <param name="paths">List of redundant paths.</param> /// <returns>End node of redundant path cluster.</returns> private static DeBruijnNode GetEndNode(DeBruijnPathList paths) { return paths.Paths.First().PathNodes.Last(); }
/// <summary> /// Extract best path from list of paths. For the current cluster /// of paths, return only those that should be removed. /// </summary> /// <param name="divergingPaths">List of redundant paths.</param> /// <returns>List of paths nodes to be deleted.</returns> private static DeBruijnPathList ExtractBestPath(DeBruijnPathList divergingPaths) { // Find "best" path. Except for best path, return rest for removal int bestPathIndex = GetBestPath(divergingPaths); DeBruijnPath bestPath = divergingPaths.Paths[bestPathIndex]; divergingPaths.Paths.RemoveAt(bestPathIndex); // There can be overlap between redundant paths. // Remove path nodes that occur in best path foreach (var path in divergingPaths.Paths) { path.RemoveAll(n => bestPath.PathNodes.Contains(n)); } return divergingPaths; }
/// <summary> /// Gets the best path from the list of diverging paths. /// Path that has maximum sum of 'count' of belonging k-mers is best. /// In case there are multiple 'best' paths, we arbitrarily return one of them. /// </summary> /// <param name="divergingPaths">List of diverging paths.</param> /// <returns>Index of the best path.</returns> private static int GetBestPath(DeBruijnPathList divergingPaths) { // We find the index of the 'best' path. long max = -1; int maxIndex = -1; // Path that has the maximum sum of 'count' of belonging k-mers is the winner for (int i = 0; i < divergingPaths.Paths.Count; i++) { long sum = divergingPaths.Paths[i].PathNodes.Sum(n => n.KmerCount); if (sum > max) { max = sum; maxIndex = i; } } return maxIndex; }
/// <summary> /// Removes nodes that are part of redundant paths. /// </summary> /// <param name="deBruijnGraph">De Bruijn graph.</param> /// <param name="nodesList">Path nodes to be deleted.</param> public void RemoveErroneousNodes(DeBruijnGraph deBruijnGraph, DeBruijnPathList nodesList) { if (this.graph == null) { throw new ArgumentNullException("deBruijnGraph"); } DeBruijnGraph.ValidateGraph(deBruijnGraph); if (nodesList == null) { throw new ArgumentNullException("nodesList"); } this.graph = deBruijnGraph; // Neighbors of all nodes have to be updated. HashSet<DeBruijnNode> deleteNodes = new HashSet<DeBruijnNode>( nodesList.Paths.AsParallel().SelectMany(nl => nl.PathNodes)); // Update extensions for deletion // No need for read-write lock as deleteNode's dictionary is being read, // and only other graph node's dictionaries are updated. Parallel.ForEach( deleteNodes, node => { foreach (DeBruijnNode extension in node.GetExtensionNodes()) { // If the neighbor is also to be deleted, there is no use of updation in that case if (!deleteNodes.Contains(extension)) { extension.RemoveExtensionThreadSafe(node); } } }); // Delete nodes from graph this.graph.RemoveNodes(deleteNodes); }