Ejemplo n.º 1
0
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// After nodes are deleted, some new end-points might be created. We need to check for
        /// dangling links at these new points. This list is returned in the out parameter.
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        private static IList <DeBruijnNode> RemoveErodedNodes(DeBruijnGraph graph)
        {
            bool eroded = false;

            Parallel.ForEach(
                graph.GetNodes(),
                (node) =>
            {
                if (node.IsMarkedForDelete)
                {
                    node.IsDeleted = true;
                    eroded         = true;
                }
            });

            IList <DeBruijnNode> graphNodes = null;

            if (eroded)
            {
                graphNodes = graph.GetNodes().AsParallel().Where(n =>
                {
                    bool wasEndPoint = (n.LeftExtensionNodesCount == 0 || n.RightExtensionNodesCount == 0);
                    n.RemoveMarkedExtensions();

                    // Check if this is a new end point.
                    return(wasEndPoint || (n.LeftExtensionNodesCount == 0 || n.RightExtensionNodesCount == 0));
                }).ToList();
            }
            else
            {
                graphNodes = new List <DeBruijnNode>();
            }

            return(graphNodes);
        }
Ejemplo n.º 2
0
        public void TestDeBruijnGraphBuilderSmall()
        {
            const int        KmerLength = 6;
            List <ISequence> reads      = TestInputs.GetSmallReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(20, graph.NodeCount);
            HashSet <string> nodeStrings = GetGraphNodesForSmallReads();
            string           nodeStr, nodeStrRC;

            foreach (DeBruijnNode node in graph.GetNodes())
            {
                nodeStr   = new string(graph.GetNodeSequence(node).Select(a => (char)a).ToArray());
                nodeStrRC = new string(graph.GetNodeSequence(node).GetReverseComplementedSequence().Select(a => (char)a).ToArray());
                Assert.IsTrue(nodeStrings.Contains(nodeStr) || nodeStrings.Contains(nodeStrRC));
            }

            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(51, totalEdges);
        }
        private List <DeBruijnPathList> GetIndelPaths(DeBruijnGraph deBruijnGraph)
        {
            List <DeBruijnPathList> redundantPaths = new List <DeBruijnPathList>();

            Parallel.ForEach(
                deBruijnGraph.GetNodes(),
                node =>
            {
                if (!node.ContainsSelfReference)
                {
                    // Need to check for both left and right extensions for ambiguity.
                    if (node.RightExtensionNodesCount > 1)
                    {
                        TraceDivergingExtensionPaths(node, node.GetRightExtensionNodesWithOrientation(), true, redundantPaths);
                    }

                    if (node.LeftExtensionNodesCount > 1)
                    {
                        TraceDivergingExtensionPaths(node, node.GetLeftExtensionNodesWithOrientation(), false, redundantPaths);
                    }
                }
            }
                );
            RedundantPathsPurger.ValidatePathsAreFromSameDirection(redundantPaths, deBruijnGraph.KmerLength);

            var indelPaths = redundantPaths.Where(x => x.Paths.Select(z => z.PathNodes.Count).Distinct().Count() != 1).ToList();

            //TODO: Could merge the two filters here
            indelPaths = RemoveDuplicates(indelPaths);
            indelPaths = RemoveEmbeddedPaths(indelPaths);
            return(indelPaths);
        }
Ejemplo n.º 4
0
        public void BuildScaffold()
        {
            const int        kmerLength         = 6;
            const int        dangleThreshold    = 3;
            const int        redundantThreshold = 7;
            List <ISequence> sequences          = TestInputs.GetReadsForScaffolds();

            KmerLength = kmerLength;
            SequenceReads.Clear();
            this.AddSequenceReads(sequences);
            CreateGraph();
            DanglingLinksThreshold       = dangleThreshold;
            DanglingLinksPurger          = new DanglingLinksPurger(dangleThreshold);
            RedundantPathLengthThreshold = redundantThreshold;
            RedundantPathsPurger         = new RedundantPathsPurger(redundantThreshold);
            UnDangleGraph();
            RemoveRedundancy();

            IList <ISequence> contigs = BuildContigs();
            DeBruijnGraph     graph   = Graph;

            CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20);
            GraphScaffoldBuilder scaffold    = new GraphScaffoldBuilder();
            IList <ISequence>    scaffoldSeq = scaffold.BuildScaffold(
                sequences, contigs, this.KmerLength, 3, 0);

            Assert.AreEqual(scaffoldSeq.Count, 8);
            Assert.IsTrue(scaffoldSeq[0].ToString().Equals(
                              "ATGCCTCCTATCTTAGCGCGC"));
        }
Ejemplo n.º 5
0
        public void TestDeBruijnGraphBuilderTiny()
        {
            const int        KmerLength = 3;
            List <ISequence> reads      = TestInputs.GetTinyReads();

            this.KmerLength = KmerLength;
            this.SequenceReads.Clear();
            this.SetSequenceReads(reads);

            this.CreateGraph();
            DeBruijnGraph graph = this.Graph;

            Assert.AreEqual(9, graph.NodeCount);
            HashSet <string> nodeStrings = new HashSet <string>(graph.GetNodes().Select(n =>
                                                                                        new string(graph.GetNodeSequence(n).Select(a => (char)a).ToArray())));

            Assert.IsTrue(nodeStrings.Contains("ATG") || nodeStrings.Contains("CAT"));
            Assert.IsTrue(nodeStrings.Contains("TGC") || nodeStrings.Contains("GCA"));
            Assert.IsTrue(nodeStrings.Contains("GCC") || nodeStrings.Contains("GGC"));
            Assert.IsTrue(nodeStrings.Contains("TCC") || nodeStrings.Contains("GGA"));
            Assert.IsTrue(nodeStrings.Contains("CCT") || nodeStrings.Contains("AGG"));
            Assert.IsTrue(nodeStrings.Contains("CTA") || nodeStrings.Contains("TAG"));
            Assert.IsTrue(nodeStrings.Contains("TAT") || nodeStrings.Contains("ATA"));
            Assert.IsTrue(nodeStrings.Contains("ATC") || nodeStrings.Contains("GAT"));
            Assert.IsTrue(nodeStrings.Contains("CTC") || nodeStrings.Contains("GAG"));
            long totalEdges = graph.GetNodes().Select(n => n.ExtensionsCount).Sum();

            Assert.AreEqual(31, totalEdges);
        }
        /// <summary>
        /// Removes nodes that are part of redundant paths.
        /// </summary>
        /// <param name="deBruijnGraph">De Bruijn graph.</param>
        /// <param name="nodesList">Path nodes to be deleted.</param>
        public void RemoveErroneousNodes(DeBruijnGraph deBruijnGraph, DeBruijnPathList nodesList)
        {
            DeBruijnGraph.ValidateGraph(deBruijnGraph);
            if (nodesList == null)
            {
                throw new ArgumentNullException("nodesList");
            }

            this.graph = deBruijnGraph;

            // Neighbors of all nodes have to be updated.
            HashSet <DeBruijnNode> deleteNodes = new HashSet <DeBruijnNode>(
                nodesList.Paths.AsParallel().SelectMany(nl => nl.PathNodes));

            // Update extensions for deletion
            // No need for read-write lock as deleteNode's dictionary is being read,
            // and only other graph node's dictionaries are updated.
            Parallel.ForEach(
                deleteNodes,
                node =>
            {
                foreach (DeBruijnNode extension in node.GetExtensionNodes())
                {
                    // If the neighbor is also to be deleted, there is no use of updation in that case
                    if (!deleteNodes.Contains(extension))
                    {
                        extension.RemoveExtensionThreadSafe(node);
                    }
                }
            });

            // Delete nodes from graph
            this.graph.RemoveNodes(deleteNodes);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Detect nodes that are on redundant paths.
        /// Start from any node that has ambiguous (more than one) extensions.
        /// From this node, trace path for each extension until either they
        /// converge to a single node or threshold length is exceeded.
        /// In case they converge, we have a set of redundant paths.
        /// We pick the best path based on the kmer counts of the path nodes.
        /// All paths other than the best one are returned for removal.
        /// Locks: Method only does reads. No locking necessary here or its callees.
        /// </summary>
        /// <param name="deBruijnGraph">De Bruijn Graph.</param>
        /// <returns>List of path nodes to be deleted.</returns>
        public DeBruijnPathList DetectErroneousNodes(DeBruijnGraph deBruijnGraph)
        {
            if (deBruijnGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            DeBruijnGraph.ValidateGraph(deBruijnGraph);
            this.graph = deBruijnGraph;

            List <DeBruijnPathList> redundantPaths = new List <DeBruijnPathList>();

            Parallel.ForEach(
                deBruijnGraph.GetNodes(),
                node =>
            {
                // Need to check for both left and right extensions for ambiguity.
                if (node.RightExtensionNodesCount > 1)
                {
                    TraceDivergingExtensionPaths(node, node.GetRightExtensionNodesWithOrientation(), true, redundantPaths);
                }

                if (node.LeftExtensionNodesCount > 1)
                {
                    TraceDivergingExtensionPaths(node, node.GetLeftExtensionNodesWithOrientation(), false, redundantPaths);
                }
            });

            redundantPaths = RemoveDuplicates(redundantPaths);
            return(DetachBestPath(redundantPaths));
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Add a line to each debruijin node if it corresponds to a
        /// kmer from a single position in a reference genome,
        /// </summary>
        protected void PaintKmersWithReference()
        {
            List <int>    missingLocs      = new List <int> ();
            var           refKmerPositions = SequenceToKmerBuilder.BuildKmerDictionary(ReferenceGenome.ReferenceSequence, this.KmerLength);
            int           KmersPainted     = 0;
            int           KmersSkipped     = 0;
            DeBruijnGraph graph            = this.Graph;
            long          totalNodes       = graph.NodeCount;

            foreach (var v in refKmerPositions)
            {
                ISequence    seq       = v.Key;
                IList <long> locations = v.Value;
                if (locations.Count == 1)
                {
                    var kmerData = new KmerData32();
                    kmerData.SetKmerData(seq, 0, this.KmerLength);
                    DeBruijnNode matchingNode = this.Graph.KmerManager.SetNewOrGetOld(kmerData, false);
                    if (matchingNode != null)
                    {
                        matchingNode.ReferenceGenomePosition = (short)locations [0];
                        KmersPainted++;
                        if (matchingNode.ReferenceGenomePosition < 0)
                        {
                            throw new Exception();
                        }
                    }
                    else
                    {
                        missingLocs.Add((int)locations [0]);
                    }
                }
                else
                {
                    KmersSkipped += locations.Count;
                }
            }
            if (false && OutputDiagnosticInformation)
            {
                StreamWriter sw = new StreamWriter("OutMissing.csv");
                foreach (int i in missingLocs)
                {
                    sw.WriteLine(i.ToString());
                }
                sw.Close();
            }
            double percentKmersSkipped = 100.0 * (KmersSkipped) / ((double)(KmersPainted + KmersSkipped));

            if (percentKmersSkipped > 95.0)
            {
                throw new InvalidProgramException("Reference Genome Skipped over 95% of Kmers");
            }
            double percentHit = KmersPainted / (double)refKmerPositions.Count;

            RaiseMessage("A total of " + (100.0 * percentHit).ToString() + "% nodes in the reference were painted");
            PercentNodesPainted = 100.0 * KmersPainted / (double)totalNodes;
            RaiseMessage(PercentNodesPainted.ToString("n2") + " % of nodes painted, for a total of " + KmersPainted.ToString() + " painted.");
            RaiseMessage(percentKmersSkipped.ToString("n2") + " % of Kmers were skipped for being in multiple locations");
        }
Ejemplo n.º 9
0
 /// <summary>
 /// Build contig sequences from the graph.
 /// </summary>
 /// <param name="graph">De Bruijn graph</param>
 /// <returns>List of contig data</returns>
 public IList <ISequence> Build(DeBruijnGraph graph)
 {
     _graph             = graph;
     _coverageThreshold = -1;
     DeBruijnGraph.ValidateGraph(_graph);
     ExcludeAmbiguousExtensions();
     _graph.Nodes.AsParallel().ForAll(n => n.PurgeInvalidExtensions());
     return(GetSimplePaths());
 }
Ejemplo n.º 10
0
 public GraphGenerator(DeBruijnGraph assemblyGraph)
 {
     this._graph = assemblyGraph;
     CreateMetaNodes();
     //verify all nodes visited
     if (_graph.GetNodes().Any(x => !x.IsVisited))
     {
         throw new Exception("Failed to visit all nodes!");
     }
 }
Ejemplo n.º 11
0
        /// <summary>
        /// Detect nodes that are part of dangling links.
        /// Locks: Method only does reads. No locking necessary here or its callees.
        /// </summary>
        /// <param name="deBruijnGraph">Input graph.</param>
        /// <returns>List of nodes in dangling links.</returns>
        public DeBruijnPathList DetectErroneousNodes(DeBruijnGraph deBruijnGraph)
        {
            if (deBruijnGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            BlockingCollection <DeBruijnPath> debruijnPaths = new BlockingCollection <DeBruijnPath>();

            Task[] tasks = new Task[1];

            DeBruijnPathList danglingNodesList = null;
            Task             collectionTask    = Task.Factory.StartNew(() =>
            {
                danglingNodesList = new DeBruijnPathList(this.GetPaths(debruijnPaths));
            });

            tasks[0] = collectionTask;

            Parallel.ForEach(
                deBruijnGraph.GetNodes(),
                (node) =>
            {
                if (node.ExtensionsCount == 0)
                {
                    // Single node island
                    debruijnPaths.Add(new DeBruijnPath(node));
                }
                else if (node.RightExtensionNodesCount == 0)
                {
                    // End of possible dangling link
                    // Trace back to see if it is part of a dangling link
                    var link = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                    if (link != null)
                    {
                        debruijnPaths.Add(link);
                    }
                }
                else if (node.LeftExtensionNodesCount == 0)
                {
                    // End of possible dangling link
                    // Trace back to see if it is part of a dangling link
                    var link = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                    if (link != null)
                    {
                        debruijnPaths.Add(link);
                    }
                }
            });

            debruijnPaths.CompleteAdding();
            Task.WaitAll(collectionTask);

            return(danglingNodesList);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Generate contig overlap graph.
        /// </summary>
        /// <param name="contigs">List of contig sequences.</param>
        /// <returns>Contig Graph.</returns>
        protected DeBruijnGraph GenerateContigOverlapGraph(IList <ISequence> contigs)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            DeBruijnGraph contigGraph = new DeBruijnGraph();

            contigGraph.BuildContigGraph(contigs, _kmerLength);
            return(contigGraph);
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Detect nodes that are part of dangling links.
        /// Locks: Method only does reads. No locking necessary here or its callees.
        /// </summary>
        /// <param name="deBruijnGraph">Input graph.</param>
        /// <returns>List of nodes in dangling links.</returns>
        public DeBruijnPathList DetectErroneousNodes(DeBruijnGraph deBruijnGraph)
        {
            if (deBruijnGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            ConcurrentBag <DeBruijnPath> debruijnPaths = new ConcurrentBag <DeBruijnPath>();

            DeBruijnPathList danglingNodesList = null;

            Parallel.ForEach(deBruijnGraph.GetNodes(), node =>
            {
                if (node.ExtensionsCount == 0)
                {
                    // Single node island
                    debruijnPaths.Add(new DeBruijnPath(node));
                }
                else if (node.RightExtensionNodesCount == 0)
                {
                    // End of possible dangling link
                    // Trace back to see if it is part of a dangling link
                    var link = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                    if (link != null && link.PathNodes.Count > 0)
                    {
                        debruijnPaths.Add(link);
                    }
                }
                else if (node.LeftExtensionNodesCount == 0)
                {
                    // End of possible dangling link
                    // Trace back to see if it is part of a dangling link
                    var link = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                    if (link != null && link.PathNodes.Count > 0)  //if the first node is below the threshold, it is not added, leaving a link with no nodes, so check is needed
                    {
                        debruijnPaths.Add(link);
                    }
                }
                else if (node.ContainsSelfReference)
                {
                    //does it have not self references?
                    if (node.ExtensionsCount == 1)
                    {
                        debruijnPaths.Add(new DeBruijnPath(node));
                    }
                }
            }
                             );
            danglingNodesList = new DeBruijnPathList(debruijnPaths);

            return(danglingNodesList);
        }
Ejemplo n.º 14
0
 /// <summary>
 /// Build contig sequences from the graph.
 /// </summary>
 /// <param name="deBruijnGraph">De Bruijn graph.</param>
 /// <returns>List of contig data.</returns>
 public IEnumerable <ISequence> Build(DeBruijnGraph deBruijnGraph)
 {
     if (deBruijnGraph == null)
     {
         throw new ArgumentNullException("deBruijnGraph");
     }
     this._graph             = deBruijnGraph;
     this._coverageThreshold = Double.NaN;
     DeBruijnGraph.ValidateGraph(deBruijnGraph);
     this.ExcludeAmbiguousExtensions();
     Parallel.ForEach(_graph.GetNodes(), n => n.PurgeInvalidExtensions());
     return(this.GetSimplePaths(true));
 }
 public MitochondrialAssembly(DeBruijnGraph graph, string Prefix)
 {
     gg = new GraphGenerator(graph);
     gg.OutputDotGraph(Prefix + "Graph.dot");
     if (gg.MetaNodes.Count > 0)
     {
         attemptToCreateAssembly();
     }
     else
     {
         SuccessfulAssembly = false; FormsCompleteLoop = false;
     }
 }
Ejemplo n.º 16
0
        /// <summary>
        /// Detect nodes that are part of dangling links
        /// Locks: Method only does reads. No locking necessary here or its callees.
        /// </summary>
        /// <param name="graph">Input graph</param>
        /// <returns>List of nodes in dangling links</returns>
        public DeBruijnPathList DetectErroneousNodes(DeBruijnGraph graph)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            DeBruijnNode[] graphNodesArray = graph.Nodes.ToArray();
            int            rangeSize       = (int)Math.Ceiling((float)graphNodesArray.Length / Environment.ProcessorCount);

            DeBruijnPathList danglingNodesList = new DeBruijnPathList(
                Partitioner.Create(0, graphNodesArray.Length, rangeSize).AsParallel().SelectMany(chunk =>
            {
                List <DeBruijnPath> danglingLinks = new List <DeBruijnPath>();
                for (int i = chunk.Item1; i < chunk.Item2; i++)
                {
                    DeBruijnNode node = graphNodesArray[i];
                    if (node.ExtensionsCount == 0)
                    {
                        // Single node island
                        danglingLinks.Add(new DeBruijnPath(node));
                    }
                    else if (node.RightExtensionNodes.Count == 0)
                    {
                        // End of possible dangling link
                        // Traceback to see if it is part of a dangling link
                        var link = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                        if (link != null)
                        {
                            danglingLinks.Add(link);
                        }
                    }
                    else if (node.LeftExtensionNodes.Count == 0)
                    {
                        // End of possible dangling link
                        // Traceback to see if it is part of a dangling link
                        var link = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                        if (link != null)
                        {
                            danglingLinks.Add(link);
                        }
                    }
                }
                return(danglingLinks);
            }));

            return(danglingNodesList);
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Converts the scaffold path into its sequence.
        /// </summary>
        /// <param name="graph">De Bruijn graph.</param>
        /// <param name="kmerLength">Kmer Length.</param>
        /// <returns>Scaffold Sequence.</returns>
        public ISequence BuildSequenceFromPath(DeBruijnGraph graph, int kmerLength)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            DeBruijnNode startNode          = this[0].Key;
            bool         isForwardDirection = this[0].Value.IsSameOrientation;

            startNode.MarkNode();
            ISequence scaffoldSequence = new Sequence(Alphabets.DNA);

            scaffoldSequence.InsertRange(0, graph.GetNodeSequence(startNode).ToString());
            this.RemoveAt(0);

            // There is overlap of (k-1) symbols between adjacent contigs
            if (kmerLength > 1)
            {
                kmerLength--;
            }

            bool      sameOrientation = true;
            ISequence nextNodeSequence;

            foreach (KeyValuePair <DeBruijnNode, DeBruijnEdge> extensions in this)
            {
                sameOrientation  = !(sameOrientation ^ extensions.Value.IsSameOrientation);
                nextNodeSequence = sameOrientation ? graph.GetNodeSequence(extensions.Key) :
                                   graph.GetNodeSequence(extensions.Key).ReverseComplement;

                // Extend scaffold sequence using symbols from contig beyond the overlap
                if (isForwardDirection)
                {
                    scaffoldSequence.InsertRange(scaffoldSequence.Count,
                                                 nextNodeSequence.Range(kmerLength, nextNodeSequence.Count - kmerLength).ToString());
                }
                else
                {
                    scaffoldSequence.InsertRange(0,
                                                 nextNodeSequence.Range(0, nextNodeSequence.Count - kmerLength).ToString());
                }

                extensions.Key.MarkNode();
            }

            return(scaffoldSequence);
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Build contigs from graph. For contigs whose coverage is less than
        /// the specified threshold, remove graph nodes belonging to them.
        /// </summary>
        /// <param name="graph">DeBruijn Graph</param>
        /// <param name="coverageThreshold">Coverage Threshold for contigs</param>
        /// <returns>Number of nodes removed</returns>
        public int RemoveLowCoverageContigs(DeBruijnGraph graph, double coverageThreshold)
        {
            if (coverageThreshold <= 0)
            {
                throw new ArgumentException("For removing low coverage contigs, coverage threshold should be a positive number");
            }

            _coverageThreshold = coverageThreshold;
            _graph             = graph;
            DeBruijnGraph.ValidateGraph(_graph);
            ExcludeAmbiguousExtensions();
            _graph.Nodes.AsParallel().ForAll(n => n.ComputeValidExtensions());
            GetSimplePaths();
            _graph.Nodes.AsParallel().ForAll(n => n.UndoAmbiguousExtensions());
            return(_graph.Nodes.RemoveWhere(n => n.IsMarked()));
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Performs Breadth First Search to traverse through graph to generate scaffold paths.
        /// </summary>
        /// <param name="graph">Contig Overlap Graph.</param>
        /// <param name="contigPairedReadMaps">InterContig Distances.</param>
        /// <param name="kmerLength">Length of Kmer</param>
        /// <param name="depth">Depth to which graph is searched.</param>
        /// <returns>List of paths/scaffold</returns>
        public IList <ScaffoldPath> FindPaths(
            DeBruijnGraph graph,
            ContigMatePairs contigPairedReadMaps,
            int kmerLength,
            int depth = 10)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            if (contigPairedReadMaps == null)
            {
                throw new ArgumentNullException("contigPairedReadMaps");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Resource.KmerLength);
            }

            if (depth <= 0)
            {
                throw new ArgumentException(Resource.Depth);
            }

            _graph      = graph;
            _kmerLength = kmerLength;
            _depth      = depth;

            List <ScaffoldPath> scaffoldPaths = new List <ScaffoldPath>();

            Parallel.ForEach(_graph.Nodes, (DeBruijnNode node) =>
            {
                Dictionary <ISequence, IList <ValidMatePair> > contigPairedReadMap;
                if (contigPairedReadMaps.TryGetValue(graph.GetNodeSequence(node), out contigPairedReadMap))
                {
                    List <ScaffoldPath> scaffoldPath = TraverseGraph(node, contigPairedReadMap);
                    lock (scaffoldPaths)
                    {
                        scaffoldPaths.AddRange(scaffoldPath);
                    }
                }
            });

            return(scaffoldPaths);
        }
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// </summary>
        /// <param name="graph">De Bruijn Graph.</param>
        public void RemoveLowCoverageNodes(DeBruijnGraph graph)
        {
            //Basic strategy here, start at all reference nodes, go find everything that isn't in there
            //and remove it.
            DeBruijnGraph.ValidateGraph(graph);
            //Mark all nodes as not visited
            //Now visit everyone that is connected to the reference somehow
            //Now mark any unvisited node for deletion.
            if (Bio.CrossPlatform.Environment.GetRunningPlatform() != Bio.CrossPlatform.Environment.Platform.Mac)
            {
                Parallel.ForEach(graph.GetNodes(), new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Environment.ProcessorCount
                }, x => {
                    if (x.KmerCount < CoverageCutOff)
                    {
                        x.MarkNodeForDelete();
                    }
                });
                Parallel.ForEach(
                    graph.GetNodes(),
                    (node) =>
                {
                    node.RemoveMarkedExtensions();
                });
            }
            else
            {
                foreach (var x in graph.GetNodes())
                {
                    if (x.KmerCount < CoverageCutOff)
                    {
                        x.MarkNodeForDelete();
                    }
                }
                foreach (var node in
                         graph.GetNodes())
                {
                    node.RemoveMarkedExtensions();
                }
            }

            //Now to delete them, since they are not connected to anything we are keeping,
            //no need to alter the graph structure
            graph.RemoveMarkedNodes();
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Dispose field instances.
        /// </summary>
        /// <param name="disposeManaged">If disposeManaged equals true, clean all resources.</param>
        protected virtual void Dispose(bool disposeManaged)
        {
            if (disposeManaged)
            {
                if (this.scaffoldBuilder != null)
                {
                    this.scaffoldBuilder.Dispose();
                }

                this.graph                = null;
                this.sequenceReads        = null;
                this.danglingLinksPurger  = null;
                this.redundantPathsPurger = null;
                this.contigBuilder        = null;
                this.scaffoldBuilder      = null;
            }
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
        /// After nodes are deleted, some new end-points might be created. We need to check for
        /// dangling links at these new points. This list is returned in the out parameter.
        /// </summary>
        /// <param name="graph">De Bruijn Graph</param>
        /// <param name="graphNodes">Out parameter. List of graph nodes to check for dangling links</param>
        private static void RemoveErodedNodes(DeBruijnGraph graph, out ICollection <DeBruijnNode> graphNodes)
        {
            int eroded = graph.Nodes.RemoveWhere(n => n.IsMarked());

            graphNodes = null;

            if (eroded > 0)
            {
                graphNodes =
                    graph.Nodes.AsParallel().Where(n =>
                {
                    bool wasEndPoint = (n.LeftExtensionNodes.Count == 0 || n.RightExtensionNodes.Count == 0);
                    n.RemoveMarkedExtensions();
                    // Check if this is a new end point.
                    return(wasEndPoint || (n.LeftExtensionNodes.Count == 0 || n.RightExtensionNodes.Count == 0));
                }).ToList();
            }
        }
        /// <summary>
        /// Calls indels based on the graph assembly.
        /// </summary>
        /// <returns>The indels.</returns>
        /// <param name="deBruijnGraph">De bruijn graph.</param>
        public List <ContinuousFrequencyIndelGenotype> CallAndRemoveIndels(DeBruijnGraph deBruijnGraph)
        {
            if (deBruijnGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }
            this.graph = deBruijnGraph;

            var indelPaths = GetIndelPaths(graph);
            var collection = indelPaths.SelectMany(x => IndelPathCollection.CallIndelsFromPathCollection(x, deBruijnGraph)).ToList();

            //now to clean out indels
            //Stolen from redundant path purger, need to merge later.
            var cleaned = DetachBestPath(indelPaths);

            RemoveErroneousNodes(deBruijnGraph, cleaned);
            return(collection);
        }
Ejemplo n.º 24
0
 /// <summary>
 /// Build contigs from graph. For contigs whose coverage is less than
 /// the specified threshold, remove graph nodes belonging to them.
 /// </summary>
 /// <param name="deBruijnGraph">DeBruijn Graph.</param>
 /// <param name="coverageThresholdForContigs">Coverage Threshold for contigs.</param>
 /// <returns>Number of nodes removed.</returns>
 public long RemoveLowCoverageContigs(DeBruijnGraph deBruijnGraph, double coverageThresholdForContigs)
 {
     if (deBruijnGraph == null)
     {
         throw new ArgumentNullException("deBruijnGraph");
     }
     if (coverageThresholdForContigs <= 0)
     {
         throw new ArgumentException("For removing low coverage contigs, coverage threshold should be a positive number");
     }
     this._coverageThreshold = coverageThresholdForContigs;
     this._graph             = deBruijnGraph;
     DeBruijnGraph.ValidateGraph(deBruijnGraph);
     this.ExcludeAmbiguousExtensions();
     Parallel.ForEach(deBruijnGraph.GetNodes(), n => n.ComputeValidExtensions());
     this.GetSimplePaths(false);
     Parallel.ForEach(deBruijnGraph.GetNodes(), n => n.UndoAmbiguousExtensions());
     return(deBruijnGraph.RemoveMarkedNodes());
 }
Ejemplo n.º 25
0
        /// <summary>
        /// Generate sequences from list of contig nodes.
        /// </summary>
        /// <param name="contigGraph">Contig Overlap Graph.</param>
        /// <param name="paths">Scaffold paths.</param>
        /// <returns>List of sequences of scaffolds.</returns>
        protected IList <ISequence> GenerateScaffold(
            DeBruijnGraph contigGraph,
            IList <ScaffoldPath> paths)
        {
            if (contigGraph == null)
            {
                throw new ArgumentNullException("contigGraph");
            }

            if (paths == null)
            {
                throw new ArgumentNullException("paths");
            }

            List <ISequence>           scaffolds    = paths.AsParallel().Select(t => t.BuildSequenceFromPath(contigGraph, _kmerLength)).ToList();
            IEnumerable <DeBruijnNode> visitedNodes = contigGraph.Nodes.AsParallel().Where(t => !t.IsMarked());

            scaffolds.AddRange(visitedNodes.AsParallel().Select(t => contigGraph.GetNodeSequence(t)));
            contigGraph.Dispose();
            return(scaffolds);
        }
 public List <DeletionAnalysis> FindAllDeletions(DeBruijnGraph graph, MitochondrialAssembly assembly)
 {
     LargeDeletionFinder.graph = graph;
     KmerLength = graph.KmerLength;
     //set all edges in the graph to not be visited
     graph.GetNodes().AsParallel().ForAll(x => x.ResetVisitState());
     foreach (DeBruijnNode node in graph.GetNodes())
     {
         //starting from any unused edges in the network, make any/all paths one can
         //take
         try
         {
             PossibleDeletionPaths.AddRange(ExtendFromStartNode(node));
         }
         catch (Exception thrown) {
             Console.WriteLine(thrown.Message);
         }
     }
     DeletionReports = PossibleDeletionPaths.Select(x => new DeletionAnalysis(x)).ToList();
     return(DeletionReports);
 }
 /// <summary>
 /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables.
 /// </summary>
 /// <param name="graph">De Bruijn Graph.</param>
 public void RemoveUnconnectedNodes(DeBruijnGraph graph, IEnumerable <DeBruijnNode> referenceNodes)
 {
     //Basic strategy here, start at all reference nodes, go find everything that isn't in there
     //and remove it.
     DeBruijnGraph.ValidateGraph(graph);
     //Mark all nodes as not visited
     graph.SetNodeVisitState(false);
     //Now visit everyone that is connected to the reference somehow
     //This loop should spend basically all its time on the first node
     foreach (DeBruijnNode node in referenceNodes)
     {
         if (node.IsVisited)
         {
             continue;
         }
         else
         {
             visitAllConnectedNodes(node);
         }
     }
     //Now mark any unvisited node for deletion.
     Parallel.ForEach(graph.GetNodes(), new ParallelOptions()
     {
         MaxDegreeOfParallelism = Environment.ProcessorCount
     }, x => {
         if (!x.IsVisited)
         {
             x.MarkNodeForDelete();
         }
     });
     Parallel.ForEach(
         graph.GetNodes(),
         (node) =>
     {
         node.RemoveMarkedExtensions();
     });
     //Now to delete them, since they are not connected to anything we are keeping,
     //no need to alter the graph structure
     graph.RemoveMarkedNodes();
 }
Ejemplo n.º 28
0
        private void MakeCircle(DeBruijnNode startNode, DeBruijnGraph graph)
        {
            CircularLoop = true;
            byte[] v = startNode.GetOriginalSymbols(graph.KmerLength);
            Console.WriteLine((new Sequence(DnaAlphabet.Instance, v)).ToString());
            ConstituentNodes.Add(startNode);
            startNode.IsVisited = true;
            Dictionary <DeBruijnNode, bool> nextNodes;
            bool goRight = true;

            nextNodes = startNode.GetRightExtensionNodesWithOrientation();
            var          nextSet = nextNodes.First();
            DeBruijnNode next    = nextSet.Key;

            while (next != startNode)
            {
                next.IsVisited = true;
                ConstituentNodes.Add(next);
                bool      sameOrientation = nextSet.Value;
                NODE_TYPE nextType        = ClassifyNode(next);
                //what direction do we get the node following the next one from? (Note path out determined by path in, so don't need to look at next node to get side of the one after).
                goRight = (!goRight) ^ sameOrientation;
                if (nextType == NODE_TYPE.LINK_IN_CHAIN)
                {
                    //NOTE: four possibilities condense in to 2 possible sides so written with ^ operator
                    nextNodes = goRight ? next.GetRightExtensionNodesWithOrientation() : next.GetLeftExtensionNodesWithOrientation();
                    //now how to determine what base to get? This only depends on relationship of current node to next node
                    //in all cases we either grab the RC of the first base or the last base, and which to grab is determined by incoming node
                    byte nextSymbol = GetNextSymbol(next, graph.KmerLength, !goRight);
                    contigSequence.Add(nextSymbol);
                }
                else
                {
                    throw new Exception("Non circular path being treated like one");
                }
                nextSet = nextNodes.First();
                next    = nextSet.Key;
            }
            Sequence = (new Sequence((IAlphabet)NoGapDnaAlphabet.Instance, contigSequence.ToArray())).ConvertToString(0, contigSequence.Count);
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Removes nodes that are part of dangling links.
        /// </summary>
        /// <param name="deBruijnGraph">Input graph.</param>
        /// <param name="nodesList">List of dangling link nodes.</param>
        public void RemoveErroneousNodes(DeBruijnGraph deBruijnGraph, DeBruijnPathList nodesList)
        {
            // Argument Validation
            if (deBruijnGraph == null)
            {
                throw new ArgumentNullException("deBruijnGraph");
            }

            if (nodesList == null)
            {
                throw new ArgumentNullException("nodesList");
            }
            HashSet <DeBruijnNode> lastNodes = new HashSet <DeBruijnNode>(nodesList.Paths.Select(nl => nl.PathNodes.Last()));

            // Update extensions and Delete nodes from graph.
            deBruijnGraph.RemoveNodes(
                nodesList.Paths.AsParallel().SelectMany(nodes =>
            {
                RemoveLinkNodes(nodes, lastNodes);
                return(nodes.PathNodes);
            }));
        }
Ejemplo n.º 30
0
        /// <summary>
        /// Dispose field instances
        /// </summary>
        /// <param name="disposeManaged">If disposeManaged equals true, clean all resources</param>
        protected virtual void Dispose(bool disposeManaged)
        {
            if (disposeManaged)
            {
                if (_graph != null)
                {
                    _graph.Dispose();
                }

                if (_scaffoldBuilder != null)
                {
                    _scaffoldBuilder.Dispose();
                }

                _graph                = null;
                _sequenceReads        = null;
                _danglingLinksPurger  = null;
                _redundantPathsPurger = null;
                _contigBuilder        = null;
                _scaffoldBuilder      = null;
            }
        }