Exemple #1
0
        /// <summary>
        /// Checks if 'node' can be added to 'path' without causing a loop.
        /// If yes, adds node to path and returns true. If not, returns false.
        /// </summary>
        /// <param name="contigPath">List of graph nodes corresponding to contig path</param>
        /// <param name="contigSequence">Sequence of contig being assembled</param>
        /// <param name="nextNode">Next node on the path to be addded</param>
        /// <param name="isForwardDirection">Boolean indicating direction</param>
        /// <param name="isSameOrientation">Boolean indicating orientation</param>
        /// <returns>Boolean indicating if path was updated successfully</returns>
        private bool CheckAndAddNode(
            List <DeBruijnNode> contigPath,
            ISequence contigSequence,
            DeBruijnNode nextNode,
            bool isForwardDirection,
            bool isSameOrientation)
        {
            if (contigPath.Contains(nextNode))
            {
                // there is a loop in this link
                // Return false indicating no update has been made
                return(false);
            }
            else
            {
                // Add node to contig list
                contigPath.Add(nextNode);

                // Update contig sequence with sequence from next node
                ISequence nextSequence = isSameOrientation ?
                                         _graph.GetNodeSequence(nextNode)
                    : _graph.GetNodeSequence(nextNode).ReverseComplement;
                if (isForwardDirection)
                {
                    contigSequence.Add(nextSequence.Last());
                }
                else
                {
                    contigSequence.Insert(0, nextSequence.First());
                }

                return(true);
            }
        }
Exemple #2
0
        /// <summary>
        /// Classifies the node for purposes of creating a graph where links of singly joined nodes are combined
        /// </summary>
        /// <returns></returns>
        public static NODE_TYPE ClassifyNode(DeBruijnNode startNode)
        {
            var lefts  = startNode.GetLeftExtensionNodes().ToArray();
            var rights = startNode.GetRightExtensionNodes().ToArray();

            if (lefts.Any(x => rights.Contains(x)))
            {
                return(NODE_TYPE.END_LOOPS_ON_ITSELF);
            }
            //First to check if this guy can form an infinite circle with itself
            int validLeftExtensionsCount  = lefts.Length;
            int validRightExtensionsCount = rights.Length;

            if (validLeftExtensionsCount != 1 && validRightExtensionsCount == 1)
            {
                return(NODE_TYPE.GO_RIGHT);
            }
            else if (validLeftExtensionsCount == 1 && validRightExtensionsCount != 1)
            {
                return(NODE_TYPE.GO_LEFT);
            }
            else if (validRightExtensionsCount == 1 && validLeftExtensionsCount == 1)
            {
                return(NODE_TYPE.LINK_IN_CHAIN);
            }
            else if (validRightExtensionsCount > 1 && validLeftExtensionsCount > 1)
            {
                return(NODE_TYPE.NEXUS);
            }
            else if (validLeftExtensionsCount != 1 && validRightExtensionsCount != 1)
            {
                return(NODE_TYPE.ISLAND);
            }
            throw new Exception("Apparently you did not handle all cases...");
        }
            /// <summary>
            /// Follows all paths leaving a node and returns the first one that has a reference genome location.
            /// </summary>
            /// <param name="node"></param>
            /// <param name="grabRightSide"></param>
            /// <param name="curDistance">How far removed from node we are at present</param>
            /// <returns></returns>
            private DistanceLocation FollowNode(DeBruijnNode node, bool grabRightSide, int curDistance)
            {
                var nextNodes =
                    grabRightSide ? node.GetRightExtensionNodesWithOrientation() : node.GetLeftExtensionNodesWithOrientation();

                foreach (var neighbor in nextNodes)
                {
                    if (neighbor.Key.IsInReference)
                    {
                        return(new DistanceLocation()
                        {
                            Distance = curDistance, RefGenomeLocation = neighbor.Key.ReferenceGenomePosition
                        });
                    }
                    else
                    {
                        var nextSideRight = !(neighbor.Value ^ grabRightSide);
                        var res           = FollowNode(neighbor.Key, nextSideRight, curDistance + 1);
                        if (res != null)
                        {
                            return(res);
                        }
                    }
                }
                return(null);
            }
Exemple #4
0
        /// <summary>
        /// Add a line to each debruijin node if it corresponds to a
        /// kmer from a single position in a reference genome,
        /// </summary>
        protected void PaintKmersWithReference()
        {
            List <int>    missingLocs      = new List <int> ();
            var           refKmerPositions = SequenceToKmerBuilder.BuildKmerDictionary(ReferenceGenome.ReferenceSequence, this.KmerLength);
            int           KmersPainted     = 0;
            int           KmersSkipped     = 0;
            DeBruijnGraph graph            = this.Graph;
            long          totalNodes       = graph.NodeCount;

            foreach (var v in refKmerPositions)
            {
                ISequence    seq       = v.Key;
                IList <long> locations = v.Value;
                if (locations.Count == 1)
                {
                    var kmerData = new KmerData32();
                    kmerData.SetKmerData(seq, 0, this.KmerLength);
                    DeBruijnNode matchingNode = this.Graph.KmerManager.SetNewOrGetOld(kmerData, false);
                    if (matchingNode != null)
                    {
                        matchingNode.ReferenceGenomePosition = (short)locations [0];
                        KmersPainted++;
                        if (matchingNode.ReferenceGenomePosition < 0)
                        {
                            throw new Exception();
                        }
                    }
                    else
                    {
                        missingLocs.Add((int)locations [0]);
                    }
                }
                else
                {
                    KmersSkipped += locations.Count;
                }
            }
            if (false && OutputDiagnosticInformation)
            {
                StreamWriter sw = new StreamWriter("OutMissing.csv");
                foreach (int i in missingLocs)
                {
                    sw.WriteLine(i.ToString());
                }
                sw.Close();
            }
            double percentKmersSkipped = 100.0 * (KmersSkipped) / ((double)(KmersPainted + KmersSkipped));

            if (percentKmersSkipped > 95.0)
            {
                throw new InvalidProgramException("Reference Genome Skipped over 95% of Kmers");
            }
            double percentHit = KmersPainted / (double)refKmerPositions.Count;

            RaiseMessage("A total of " + (100.0 * percentHit).ToString() + "% nodes in the reference were painted");
            PercentNodesPainted = 100.0 * KmersPainted / (double)totalNodes;
            RaiseMessage(PercentNodesPainted.ToString("n2") + " % of nodes painted, for a total of " + KmersPainted.ToString() + " painted.");
            RaiseMessage(percentKmersSkipped.ToString("n2") + " % of Kmers were skipped for being in multiple locations");
        }
Exemple #5
0
 public static uint CalculateConnectionWeight(DeBruijnNode FirstNode, DeBruijnNode SecondNode)
 {
     //First verify that they share
     if (!FirstNode.GetExtensionNodes().Contains(SecondNode))
     {
         throw new Exception("Can't calculate non-overlapping extensions");
     }
     return(SecondNode.KmerCount);
 }
        /// <summary>
        /// Trace simple path in specified direction.
        /// </summary>
        /// <param name="contigPath">List of graph nodes corresponding to contig path.</param>
        /// <param name="contigSequence">Sequence of contig being assembled.</param>
        /// <param name="isForwardDirection">Boolean indicating direction of path.</param>
        /// <param name="sameOrientation">Path orientation.</param>
        /// <param name="node">Next node on the path.</param>
        /// <param name="createContigSequences">Indicates whether the contig sequences are to be created or not.</param>
        private void TraceSimplePathLinks(
            List <DeBruijnNode> contigPath,
            List <byte> contigSequence,
            bool isForwardDirection,
            bool sameOrientation,
            DeBruijnNode node,
            bool createContigSequences)
        {
            bool endFound = false;

            while (!endFound)
            {
                node.IsVisited = true;
                // Get extensions going in same directions.
                Dictionary <DeBruijnNode, bool> sameDirectionExtensions = (isForwardDirection ^ sameOrientation)
                    ? node.GetLeftExtensionNodesWithOrientation()
                    : node.GetRightExtensionNodesWithOrientation();

                if (sameDirectionExtensions.Count == 0)
                {
                    // Found end of path. Add this and return
                    CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences);
                    endFound = true;
                }
                else
                {
                    var sameDirectionExtension = sameDirectionExtensions.First();

                    // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1)
                    // Continue traceback in the same direction. Add this node to list and continue.
                    if (!CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences))
                    {
                        // Loop is found. Cannot extend simple path further
                        //Assuming that any node with extensions >2 from either side have been trimmed, this should only be possible if the first
                        //node in list is last node as well, this means there is a circle in the graph of length >1, going to report it
                        if (contigPath != null && contigPath.Count > 0 && contigPath[0] == node)
                        {
                            endFound = true;
                        }
                    }
                    else
                    {
                        node            = sameDirectionExtension.Key;
                        sameOrientation =
                            !(sameOrientation ^ sameDirectionExtension.Value);
                    }
                }
            }
        }
Exemple #7
0
        /// <summary>
        /// Detect nodes that are part of dangling links
        /// Locks: Method only does reads. No locking necessary here or its callees.
        /// </summary>
        /// <param name="graph">Input graph</param>
        /// <returns>List of nodes in dangling links</returns>
        public DeBruijnPathList DetectErroneousNodes(DeBruijnGraph graph)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            DeBruijnNode[] graphNodesArray = graph.Nodes.ToArray();
            int            rangeSize       = (int)Math.Ceiling((float)graphNodesArray.Length / Environment.ProcessorCount);

            DeBruijnPathList danglingNodesList = new DeBruijnPathList(
                Partitioner.Create(0, graphNodesArray.Length, rangeSize).AsParallel().SelectMany(chunk =>
            {
                List <DeBruijnPath> danglingLinks = new List <DeBruijnPath>();
                for (int i = chunk.Item1; i < chunk.Item2; i++)
                {
                    DeBruijnNode node = graphNodesArray[i];
                    if (node.ExtensionsCount == 0)
                    {
                        // Single node island
                        danglingLinks.Add(new DeBruijnPath(node));
                    }
                    else if (node.RightExtensionNodes.Count == 0)
                    {
                        // End of possible dangling link
                        // Traceback to see if it is part of a dangling link
                        var link = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                        if (link != null)
                        {
                            danglingLinks.Add(link);
                        }
                    }
                    else if (node.LeftExtensionNodes.Count == 0)
                    {
                        // End of possible dangling link
                        // Traceback to see if it is part of a dangling link
                        var link = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                        if (link != null)
                        {
                            danglingLinks.Add(link);
                        }
                    }
                }
                return(danglingLinks);
            }));

            return(danglingNodesList);
        }
        public IEnumerable <PossibleAssembly> ExtendFromStartNode(DeBruijnNode start)
        {
            //TODO: I believe this handles figure 8s and palindromes just fine, should verify though.

            //First go Right
            var rightNeighbors             = start.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited(false);
            List <PossibleAssembly> rights = new List <PossibleAssembly>();

            foreach (var direction in rightNeighbors)
            {
                PossibleAssembly pa = new PossibleAssembly(start, true);
                rights.AddRange(ExtendChain(pa, direction.Key, true, direction.Value));
            }
            List <PossibleAssembly> lefts = new List <PossibleAssembly>();
            var leftNeighbors             = start.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited(false);

            foreach (var direction in leftNeighbors)
            {
                PossibleAssembly pa = new PossibleAssembly(start, false);
                lefts.AddRange(ExtendChain(pa, direction.Key, false, direction.Value));
            }
            //Now to combine a left and right chain
            if (lefts.Count > 0 && rights.Count > 0)
            {
                foreach (var right in rights)
                {
                    foreach (var left in lefts)
                    {
                        yield return(new PossibleAssembly(left, right));
                    }
                }
            }
            else if (lefts.Count > 0)
            {
                foreach (var left in lefts)
                {
                    yield return(left);
                }
            }
            else if (rights.Count > 0)
            {
                foreach (var right in rights)
                {
                    yield return(right);
                }
            }
        }
Exemple #9
0
        /// <summary>
        /// Initializes a new instance of the PathWithOrientation class.
        /// </summary>
        /// <param name="node1">First node to add.</param>
        /// <param name="node2">Second node to add.</param>
        /// <param name="orientation">Path orientation.</param>
        public PathWithOrientation(DeBruijnNode node1, DeBruijnNode node2, bool orientation)
        {
            if (node1 == null)
            {
                throw new ArgumentNullException("node1");
            }

            if (node2 == null)
            {
                throw new ArgumentNullException("node2");
            }

            this.nodes = new List <DeBruijnNode> {
                node1, node2
            };
            this.IsSameOrientation = orientation;
        }
Exemple #10
0
        /// <summary>
        /// Removes nodes in link from the graph.
        /// Parallelization Note: Locks required here. We are modifying graph structure here.
        /// </summary>
        /// <param name="nodes">List of nodes to remove.</param>
        /// <param name="lastNodes">Set of all nodes occurring at end of dangling links.</param>
        private static void RemoveLinkNodes(DeBruijnPath nodes, HashSet <DeBruijnNode> lastNodes)
        {
            // Nodes in the list are part of a single dangling link.
            // Only the last element of link can have left or right extensions that are valid parts of graph.
            DeBruijnNode linkStartNode = nodes.PathNodes.Last();

            // Update adjacency of nodes connected to the last node.
            // Read lock not required as linkStartNode's dictionary will not get updated
            // Locks used during removal of extensions.
            foreach (DeBruijnNode graphNode in linkStartNode.GetExtensionNodes())
            {
                // Condition to avoid updating other linkStartNode's dictionary. Reduces conflicts.
                if (!lastNodes.Contains(graphNode))
                {
                    graphNode.RemoveExtensionThreadSafe(linkStartNode);
                }
            }
        }
Exemple #11
0
        /// <summary>
        /// Trace simple path starting from 'node' in specified direction.
        /// </summary>
        /// <param name="assembledContigs">List of assembled contigs.</param>
        /// <param name="node">Starting node of contig path.</param>
        /// <param name="isForwardDirection">Boolean indicating direction of path.</param>
        /// <param name="createContigSequences">Boolean indicating whether the contig sequences are to be created or not.</param>
        /// <param name="DuplicatesPossible">Boolean indicating if duplicates are possible, true if both the forward and reverse path could be generated</param>
        private void TraceSimplePath(List <ISequence> assembledContigs, DeBruijnNode node, bool isForwardDirection, bool createContigSequences, bool DuplicatesPossible)
        {
            ISequence   nodeSequence   = this._graph.GetNodeSequence(node);
            List <byte> contigSequence = new List <byte>(nodeSequence);

            node.IsVisited = true;
            List <DeBruijnNode> contigPath = new List <DeBruijnNode> {
                node
            };
            KeyValuePair <DeBruijnNode, bool> nextNode =
                isForwardDirection ? node.GetRightExtensionNodesWithOrientation().First() : node.GetLeftExtensionNodesWithOrientation().First();

            this.TraceSimplePathLinks(contigPath, contigSequence, isForwardDirection, nextNode.Value, nextNode.Key, createContigSequences);

            // Check to remove duplicates
            if (!DuplicatesPossible || contigPath[0].NodeValue.CompareTo(contigPath.Last().NodeValue) >= 0)
            {
                double coverage = contigPath.Average(n => n.KmerCount);
                // Check contig coverage.
                if (!Double.IsNaN(_coverageThreshold))
                {
                    // Definition from Velvet Manual: http://helix.nih.gov/Applications/velvet_manual.pdf
                    // "k-mer coverage" is how many times a k-mer has been seen among the reads.

                    if (coverage < this._coverageThreshold)
                    {
                        contigPath.ForEach(n => n.MarkNodeForDelete());
                        return;
                    }
                }
                else
                {
                    if (createContigSequences)
                    {
                        lock (assembledContigs)
                        {
                            var seq = new Sequence(nodeSequence.Alphabet, contigSequence.ToArray());
                            seq.ID = " Avg K-Mer Coverage = " + coverage.ToString();
                            assembledContigs.Add(seq);
                        }
                    }
                }
            }
        }
Exemple #12
0
        /// <summary>
        /// Trace simple path in specified direction.
        /// </summary>
        /// <param name="contigPath">List of graph nodes corresponding to contig path.</param>
        /// <param name="contigSequence">Sequence of contig being assembled.</param>
        /// <param name="isForwardDirection">Boolean indicating direction of path.</param>
        /// <param name="sameOrientation">Path orientation.</param>
        /// <param name="node">Next node on the path.</param>
        /// <param name="createContigSequences">Indicates whether the contig sequences are to be created or not.</param>
        private void TraceSimplePathLinks(
            List <DeBruijnNode> contigPath,
            List <byte> contigSequence,
            bool isForwardDirection,
            bool sameOrientation,
            DeBruijnNode node,
            bool createContigSequences)
        {
            Dictionary <DeBruijnNode, bool> sameDirectionExtensions;

            bool endFound = false;

            while (!endFound)
            {
                // Get extensions going in same directions.
                sameDirectionExtensions = (isForwardDirection ^ sameOrientation) ?
                                          node.GetLeftExtensionNodesWithOrientation() : node.GetRightExtensionNodesWithOrientation();

                if (sameDirectionExtensions.Count == 0)
                {
                    // Found end of path. Add this and return
                    this.CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences);
                    endFound = true;
                }
                else
                {
                    var sameDirectionExtension = sameDirectionExtensions.First();

                    // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1)
                    // Continue traceback in the same direction. Add this node to list and continue.
                    if (!this.CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences))
                    {
                        // Loop is found. Cannot extend simple path further
                        break;
                    }
                    else
                    {
                        node            = sameDirectionExtension.Key;
                        sameOrientation =
                            !(sameOrientation ^ sameDirectionExtension.Value);
                    }
                }
            }
        }
Exemple #13
0
 public IEnumerable <DeBruijnNode> GetNodesLeavingTop()
 {
     //if chain is longer than one, use previous node to get latest
     if (this.ConstituentNodes.Count > 1)
     {
         var          topNode     = ConstituentNodes[0];
         DeBruijnNode penUltimate = ConstituentNodes[1];
         bool         goingLeft   = penUltimate.GetLeftExtensionNodes().Contains(topNode);
         var          next        = goingLeft ? penUltimate.GetLeftExtensionNodesWithOrientation().Where(x => x.Key == topNode).First() :
                                    penUltimate.GetRightExtensionNodesWithOrientation().Where(x => x.Key == topNode).First();
         var nextSet = goingLeft ^ next.Value ? next.Key.GetRightExtensionNodes() :
                       next.Key.GetLeftExtensionNodes();
         foreach (var k in nextSet)
         {
             yield return(k);
         }
     }
     else
     {
         var baseNode = this.ConstituentNodes[0];
         Debug.Assert(KmerLength == Sequence.Length);
         var  ns = new Sequence(DnaAlphabet.Instance, baseNode.GetOriginalSymbols(MetaNode.KmerLength));
         bool orientationRight;    // = baseNode.GetOriginalSymbols(KmerLength).SequenceEqual(new DnaAlphabet(DnaAlphabet.Instance, Sequence));
         if (ns.ConvertToString().Equals(Sequence))
         {
             orientationRight = true;
         }
         else if ((new Sequence(ns.GetReverseComplementedSequence()).ConvertToString().Equals(Sequence)))
         {
             orientationRight = false;
         }
         else
         {
             throw new Exception("AAA");
         }
         var nextNodes = orientationRight ? baseNode.GetLeftExtensionNodes() : baseNode.GetRightExtensionNodes();
         foreach (var v in nextNodes)
         {
             yield return(v);
         }
     }
 }
Exemple #14
0
        /// <summary>
        /// Checks if 'node' can be added to 'link' without
        /// violating any conditions pertaining to dangling links.
        /// Returns null if loop is found or length exceeds threshold.
        /// Otherwise, adds node to link and returns
        /// </summary>
        /// <param name="link">Dangling link</param>
        /// <param name="node">Node to be added</param>
        /// <param name="reachedErrorEndPoint">Indicates if we have reached end of dangling link</param>
        /// <returns>Updated dangling link</returns>
        private DeBruijnPath CheckAndAddDanglingNode(DeBruijnPath link, DeBruijnNode node, out bool reachedErrorEndPoint)
        {
            if (_erodeThreshold != -1 &&
                link.PathNodes.Count == 0 &&
                node.KmerCount < _erodeThreshold)
            {
                if (node.IsMarked())
                {
                    // There is a loop in this link. No need to update link.
                    // Set flag for end point reached as true and return.
                    reachedErrorEndPoint = true;
                    return(link);
                }
                else
                {
                    node.MarkNode();
                    reachedErrorEndPoint = false;
                    return(link);
                }
            }

            if (link.PathNodes.Contains(node))
            {
                // There is a loop in this link. No need to update link.
                // Set flag for end point reached as true and return.
                reachedErrorEndPoint = true;
                return(link);
            }

            if (link.PathNodes.Count >= _lengthThreshold)
            {
                // Length crosses threshold. Not a dangling link.
                // So set reached error end point as true and return null.
                reachedErrorEndPoint = true;
                return(null);
            }

            // No error conditions found. Add node to link.
            reachedErrorEndPoint = false;
            link.PathNodes.Add(node);
            return(link);
        }
Exemple #15
0
        /// <summary>
        /// Follow a node with one neighbor on either side and make sure it never reaches itself, which is problematic for making these things.
        /// Note that nodes can go to A->A->C if they refer to themselves but match the reverse compliment of themselves
        /// </summary>
        /// <param name="currentNode"></param>
        /// <param name="goRight"></param>
        /// <param name="graph"></param>
        /// <returns></returns>
        private bool VerifyNotCircular(DeBruijnNode currentNode)
        {
            List <DeBruijnNode> visitedNodes = new List <DeBruijnNode>();

            if (ClassifyNode(currentNode) != NODE_TYPE.LINK_IN_CHAIN)
            {
                throw new Exception("Node type doesn't match well!");
            }
            else
            {
                //go right, if we wind up where we started, circle.
                var  nextNode   = currentNode.GetRightExtensionNodesWithOrientation().First();
                bool goingRight = true;
                //we now either have the second or third node in path as next
                while (ClassifyNode(nextNode.Key) == NODE_TYPE.LINK_IN_CHAIN)
                {
                    visitedNodes.Add(nextNode.Key);
                    //determine if this is a kink or not, which will trigger issue at only first node.
                    if (nextNode.Key == currentNode)
                    {
                        //only one way to get back to the start, either we are in a circle, or the first node loops in to its reverse compliment and exits
                        //the other way, a "kink" so to speak, we know we have visited the right node since we started there, if we visited the left, problems
                        bool leftVisited = visitedNodes.Contains(currentNode.GetLeftExtensionNodes().First());
                        if (leftVisited)
                        {
                            return(false);
                        }
                        Debug.Assert(visitedNodes.Contains(currentNode.GetRightExtensionNodes().First()));
                    }

                    goingRight = !(goingRight ^ nextNode.Value);
                    var nextSet = goingRight ? nextNode.Key.GetRightExtensionNodesWithOrientation() : nextNode.Key.GetLeftExtensionNodesWithOrientation();
                    if (nextSet.Count != 1)
                    {
                        return(true);
                    }
                    nextNode = nextSet.First();
                }
                return(true);
            }
        }
Exemple #16
0
        /// <summary>
        /// Trace simple path starting from 'node' in specified direction.
        /// </summary>
        /// <param name="assembledContigs">List of assembled contigs</param>
        /// <param name="node">Starting node of contig path</param>
        /// <param name="isForwardDirection">Boolean indicating direction of path</param>
        private void TraceSimplePath(List <ISequence> assembledContigs, DeBruijnNode node, bool isForwardDirection)
        {
            ISequence nodeSequence   = _graph.GetNodeSequence(node);
            Sequence  contigSequence = new Sequence(nodeSequence.Alphabet, nodeSequence.ToString());

            contigSequence.IsReadOnly = false;

            List <DeBruijnNode> contigPath = new List <DeBruijnNode> {
                node
            };
            KeyValuePair <DeBruijnNode, DeBruijnEdge> nextNode =
                isForwardDirection ? node.RightExtensionNodes.First() : node.LeftExtensionNodes.First();

            TraceSimplePathLinks(contigPath, contigSequence, isForwardDirection, nextNode.Value.IsSameOrientation, nextNode.Key);

            // Check to remove duplicates
            if (string.CompareOrdinal(
                    _graph.GetNodeSequence(contigPath[0]).ToString(),
                    _graph.GetNodeSequence(contigPath.Last()).ToString()) >= 0)
            {
                // Check contig coverage
                if (_coverageThreshold != -1)
                {
                    // Definition from Velvet Manual: http://helix.nih.gov/Applications/velvet_manual.pdf
                    // "k-mer coverage" is how many times a k-mer has been seen among the reads.
                    double coverage = contigPath.Average(n => n.KmerCount);
                    if (coverage < _coverageThreshold)
                    {
                        contigPath.ForEach(n => n.MarkNode());
                        return;
                    }
                }
                else
                {
                    lock (assembledContigs)
                    {
                        assembledContigs.Add(contigSequence);
                    }
                }
            }
        }
Exemple #17
0
        private void MakeCircle(DeBruijnNode startNode, DeBruijnGraph graph)
        {
            CircularLoop = true;
            byte[] v = startNode.GetOriginalSymbols(graph.KmerLength);
            Console.WriteLine((new Sequence(DnaAlphabet.Instance, v)).ToString());
            ConstituentNodes.Add(startNode);
            startNode.IsVisited = true;
            Dictionary <DeBruijnNode, bool> nextNodes;
            bool goRight = true;

            nextNodes = startNode.GetRightExtensionNodesWithOrientation();
            var          nextSet = nextNodes.First();
            DeBruijnNode next    = nextSet.Key;

            while (next != startNode)
            {
                next.IsVisited = true;
                ConstituentNodes.Add(next);
                bool      sameOrientation = nextSet.Value;
                NODE_TYPE nextType        = ClassifyNode(next);
                //what direction do we get the node following the next one from? (Note path out determined by path in, so don't need to look at next node to get side of the one after).
                goRight = (!goRight) ^ sameOrientation;
                if (nextType == NODE_TYPE.LINK_IN_CHAIN)
                {
                    //NOTE: four possibilities condense in to 2 possible sides so written with ^ operator
                    nextNodes = goRight ? next.GetRightExtensionNodesWithOrientation() : next.GetLeftExtensionNodesWithOrientation();
                    //now how to determine what base to get? This only depends on relationship of current node to next node
                    //in all cases we either grab the RC of the first base or the last base, and which to grab is determined by incoming node
                    byte nextSymbol = GetNextSymbol(next, graph.KmerLength, !goRight);
                    contigSequence.Add(nextSymbol);
                }
                else
                {
                    throw new Exception("Non circular path being treated like one");
                }
                nextSet = nextNodes.First();
                next    = nextSet.Key;
            }
            Sequence = (new Sequence((IAlphabet)NoGapDnaAlphabet.Instance, contigSequence.ToArray())).ConvertToString(0, contigSequence.Count);
        }
Exemple #18
0
        /// <summary>
        /// This gets the next symbol from a node while forming chains.  This can be made a lot more efficient if it turns in to a bottleneck.
        /// all chains are extended from either the first or last base present in the node, and this base is either forward
        /// or reverse complimented, this method reflects this.
        /// </summary>
        /// <param name="node">Next node</param>
        /// <param name="graph">Graph to get symbol from</param>
        /// <param name="GetFirstNotLast">First or last base?</param>
        /// <param name="ReverseComplimentBase">Should the compliment of the base be returned</param>
        /// <returns></returns>
        public static byte GetNextSymbol(DeBruijnNode node, int kmerLength, bool GetRCofFirstBaseInsteadOfLastBase)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }
            byte[] symbols = node.GetOriginalSymbols(kmerLength);
            byte   value   = GetRCofFirstBaseInsteadOfLastBase ? symbols.First() : symbols.Last();

            if (GetRCofFirstBaseInsteadOfLastBase)
            {
                byte value2;
                bool rced = DnaAlphabet.Instance.TryGetComplementSymbol(value, out value2);
                //Should never happend
                if (!rced)
                {
                    throw new Exception("Could not revcomp base during graph construction");
                }
                value = value2;
            }
            return(value);
        }
Exemple #19
0
        /// <summary>
        /// Checks if 'node' can be added to 'path' without causing a loop.
        /// If yes, adds node to path and returns true. If not, returns false.
        /// </summary>
        /// <param name="contigPath">List of graph nodes corresponding to contig path.</param>
        /// <param name="contigSequence">Sequence of contig being assembled.</param>
        /// <param name="nextNode">Next node on the path to be added.</param>
        /// <param name="isForwardDirection">Boolean indicating direction.</param>
        /// <param name="isSameOrientation">Boolean indicating orientation.</param>
        /// <param name="createContigSequences">Boolean indicating whether contig sequences are to be created or not.</param>
        /// <returns>Boolean indicating if path was updated successfully.</returns>
        private bool CheckAndAddNode(
            List <DeBruijnNode> contigPath,
            List <byte> contigSequence,
            DeBruijnNode nextNode,
            bool isForwardDirection,
            bool isSameOrientation,
            bool createContigSequences)
        {
            //Since ambiguous extensions have been removed, the only way a link could be in the list
            //is if the first item in the list points to this item
            //TODO: NOT TRUE!!!
            if (contigPath.Count > 0 && contigPath[0] == nextNode)
            {
                // there is a loop in this link
                // Return false indicating no update has been made
                return(false);
            }

            // Add node to contig list
            contigPath.Add(nextNode);

            if (createContigSequences)
            {
                // Update contig sequence with sequence from next node
                byte symbol = this._graph.GetNextSymbolFrom(nextNode, isForwardDirection, isSameOrientation);

                if (isForwardDirection)
                {
                    contigSequence.Add(symbol);
                }
                else
                {
                    contigSequence.Insert(0, symbol);
                }
            }

            return(true);
        }
        /// <summary>
        /// Visits all connected nodes, not caring about orientation here.  Hope the graph
        /// is not so big this leads to an OOM exception.
        /// </summary>
        /// <param name="startNode">Start node.</param>
        private void visitAllConnectedNodes(DeBruijnNode startNode)
        {
            Stack <DeBruijnNode> toProcess = new Stack <DeBruijnNode> (16000);

            toProcess.Push(startNode);
            //Visit all nodes, avoid function all recursion with stack.
            do
            {
                DeBruijnNode next = toProcess.Pop();
                next.IsVisited = true;
                foreach (DeBruijnNode neighbor in next.GetExtensionNodes())
                {
                    if (neighbor.IsVisited)
                    {
                        continue;
                    }
                    else
                    {
                        toProcess.Push(neighbor);
                    }
                }
            }while(toProcess.Count > 0);
        }
Exemple #21
0
        /// <summary>
        /// Checks if 'node' can be added to 'path' without causing a loop.
        /// If yes, adds node to path and returns true. If not, returns false.
        /// </summary>
        /// <param name="contigPath">List of graph nodes corresponding to contig path.</param>
        /// <param name="contigSequence">Sequence of contig being assembled.</param>
        /// <param name="nextNode">Next node on the path to be added.</param>
        /// <param name="isForwardDirection">Boolean indicating direction.</param>
        /// <param name="isSameOrientation">Boolean indicating orientation.</param>
        /// <param name="createContigSequences">Boolean indicating whether contig sequences are to be created or not.</param>
        /// <returns>Boolean indicating if path was updated successfully.</returns>
        private bool CheckAndAddNode(
            IList <DeBruijnNode> contigPath,
            List <byte> contigSequence,
            DeBruijnNode nextNode,
            bool isForwardDirection,
            bool isSameOrientation,
            bool createContigSequences)
        {
            if (contigPath.Contains(nextNode))
            {
                // there is a loop in this link
                // Return false indicating no update has been made
                return(false);
            }
            else
            {
                // Add node to contig list
                contigPath.Add(nextNode);

                if (createContigSequences)
                {
                    // Update contig sequence with sequence from next node
                    byte symbol = this._graph.GetNextSymbolFrom(nextNode, isForwardDirection, isSameOrientation);

                    if (isForwardDirection)
                    {
                        contigSequence.Add(symbol);
                    }
                    else
                    {
                        contigSequence.Insert(0, symbol);
                    }
                }

                return(true);
            }
        }
Exemple #22
0
        /// <summary>
        /// Searches for a particular node in the tree.
        /// </summary>
        /// <param name="kmerValue">The node to be searched.</param>
        /// <returns>Actual node in the tree.</returns>
        public DeBruijnNode SearchTree(KmerData32 kmerValue)
        {
            DeBruijnNode startNode = this.root;

            while (startNode != null)
            {
                ulong currentValue = startNode.NodeValue.KmerData;
                // parameter value found
                if (currentValue == kmerValue.KmerData)
                {
                    break;
                }
                else if (kmerValue.KmerData < currentValue)
                {
                    // Search left if the value is smaller than the current node
                    startNode = startNode.Left; // search left
                }
                else
                {
                    startNode = startNode.Right; // search right
                }
            }
            return(startNode);
        }
Exemple #23
0
        /// <summary>
        /// Try and extend dangling links following
        /// graph clean-up after erosion.
        /// </summary>
        /// <param name="isForwardDirection">Boolean indicating direction of dangling link</param>
        /// <param name="danglingLink">Dangling Link</param>
        /// <param name="node">Node that is next on the link</param>
        /// <param name="sameOrientation">Orientation of link</param>
        /// <param name="removeLast">Boolean indicating if last node
        /// in link has to be removed before extending</param>
        /// <returns>Length of dangling link found after extension</returns>
        private int ExtendDanglingLink(bool isForwardDirection, DeBruijnPath danglingLink, DeBruijnNode node, bool sameOrientation, bool removeLast)
        {
            if (removeLast)
            {
                danglingLink.PathNodes.Remove(node);
            }

            if (danglingLink.PathNodes.Count == 0)
            {
                // DanglingLink is empty. So check if node is an end-point.
                if (node.RightExtensionNodes.Count == 0)
                {
                    danglingLink = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                }
                else if (node.LeftExtensionNodes.Count == 0)
                {
                    danglingLink = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                }
                else
                {
                    // Not an end-point. Return length as 0
                    return(0);
                }
            }
            else
            {
                // Extend existing link
                danglingLink = TraceDanglingExtensionLink(isForwardDirection, danglingLink, node, sameOrientation);
            }

            // Return length of dangling link found
            if (danglingLink == null)
            {
                return(0);
            }
            else
            {
                return(danglingLink.PathNodes.Count);
            }
        }
Exemple #24
0
        /// <summary>
        /// Starting from potential end of dangling link, trace back along
        /// extension edges in graph to find if it is a valid dangling link.
        /// Parallelization Note: No locks used in TraceDanglingLink.
        /// We only read graph structure here. No modifications are made.
        /// </summary>
        /// <param name="isForwardDirection">Boolean indicating direction of dangling link.</param>
        /// <param name="link">Dangling Link.</param>
        /// <param name="node">Node that is next on the link.</param>
        /// <param name="sameOrientation">Orientation of link.</param>
        /// <returns>List of nodes in dangling link.</returns>
        private DeBruijnPath TraceDanglingExtensionLink(bool isForwardDirection, DeBruijnPath link, DeBruijnNode node, bool sameOrientation)
        {
            bool reachedEndPoint = false;

            while (!reachedEndPoint)
            {
                // Get extensions going in same and opposite directions.
                Dictionary <DeBruijnNode, bool> sameDirectionExtensions;
                int sameDirectionExtensionsCount;
                int oppDirectionExtensionsCount;
                if (isForwardDirection ^ sameOrientation)
                {
                    sameDirectionExtensionsCount = node.LeftExtensionNodesCount;
                    oppDirectionExtensionsCount  = node.RightExtensionNodesCount;
                    //Avoid self references here and below
                    //TODO: We should force the k-mer to be large enough that there is no
                    sameDirectionExtensions = node.GetLeftExtensionNodesWithOrientation().
                                              Where(x => x.Key != node).
                                              ToDictionary(x => x.Key, y => y.Value);
                }
                else
                {
                    sameDirectionExtensionsCount = node.RightExtensionNodesCount;
                    oppDirectionExtensionsCount  = node.LeftExtensionNodesCount;
                    sameDirectionExtensions      = node.GetRightExtensionNodesWithOrientation().
                                                   Where(x => x.Key != node).
                                                   ToDictionary(x => x.Key, y => y.Value);
                }

                if (sameDirectionExtensionsCount == 0)
                {
                    // Found other end of dangling link
                    // Add this and return.
                    return(this.CheckAndAddDanglingNode(link, node, out reachedEndPoint));
                }

                if (oppDirectionExtensionsCount > 1)
                {
                    // Have reached a point of ambiguity. Return list without updating it.
                    if (this.erodeThreshold != -1 && !node.IsMarkedForDelete)
                    {
                        lock (this.danglingLinkExtensionTasks)
                        {
                            //THis task essentially just returns back to this method after other ones are removed
                            this.danglingLinkExtensionTasks.Add(new Task <int>((o) => this.ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, false), TaskCreationOptions.None));
                        }

                        return(null);
                    }

                    return(link);
                }

                if (sameDirectionExtensionsCount > 1)
                {
                    // Have reached a point of ambiguity. Return list after updating it.
                    link = this.CheckAndAddDanglingNode(link, node, out reachedEndPoint);
                    if (this.erodeThreshold != -1 && reachedEndPoint != true && !node.IsMarkedForDelete)
                    {
                        lock (this.danglingLinkExtensionTasks)
                        {
                            this.danglingLinkExtensionTasks.Add(new Task <int>((o) => this.ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, true), TaskCreationOptions.None));
                        }

                        return(null);
                    }

                    return(link);
                }

                // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1)
                // Continue trace back. Add this node to that list and recurse.
                link = this.CheckAndAddDanglingNode(link, node, out reachedEndPoint);
                if (reachedEndPoint)
                {
                    // Loop is found or threshold length has been exceeded.
                    return(link);
                }

                //still in loop, so just add the extension and keeps going
                var item = sameDirectionExtensions.First();
                node            = item.Key;
                sameOrientation = !(sameOrientation ^ item.Value);
            }

            return(null); // code will never reach here. Valid returns happen within the while loop.
        }
Exemple #25
0
        /// <summary>
        /// Starting from potential end of dangling link, trace back along
        /// extension edges in graph to find if it is a valid dangling link.
        /// Parallelization Note: No locks used in TraceDanglingLink.
        /// We only read graph structure here. No modifications are made.
        /// </summary>
        /// <param name="isForwardDirection">Boolean indicating direction of dangling link</param>
        /// <param name="link">Dangling Link</param>
        /// <param name="node">Node that is next on the link</param>
        /// <param name="sameOrientation">Orientation of link</param>
        /// <returns>List of nodes in dangling link</returns>
        private DeBruijnPath TraceDanglingExtensionLink(bool isForwardDirection, DeBruijnPath link, DeBruijnNode node, bool sameOrientation)
        {
            Dictionary <DeBruijnNode, DeBruijnEdge> sameDirectionExtensions, oppDirectionExtensions;

            bool reachedEndPoint = false;

            while (!reachedEndPoint)
            {
                // Get extensions going in same and opposite directions.
                if (isForwardDirection ^ sameOrientation)
                {
                    sameDirectionExtensions = node.LeftExtensionNodes;
                    oppDirectionExtensions  = node.RightExtensionNodes;
                }
                else
                {
                    sameDirectionExtensions = node.RightExtensionNodes;
                    oppDirectionExtensions  = node.LeftExtensionNodes;
                }

                if (sameDirectionExtensions.Count == 0)
                {
                    // Found other end of dangling link
                    // Add this and return
                    return(CheckAndAddDanglingNode(link, node, out reachedEndPoint));
                }
                else if (oppDirectionExtensions.Count > 1)
                {
                    // Have reached a point of ambiguity. Return list without updating it
                    if (_erodeThreshold != -1 && !node.IsMarked())
                    {
                        lock (_danglingLinkExtensionTasks)
                        {
                            _danglingLinkExtensionTasks.Add(new Task <int>((o) =>
                                                                           ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, false),
                                                                           TaskCreationOptions.None));
                        }
                        return(null);
                    }

                    return(link);
                }
                else if (sameDirectionExtensions.Count > 1)
                {
                    // Have reached a point of ambiguity. Return list after updating it
                    link = CheckAndAddDanglingNode(link, node, out reachedEndPoint);
                    if (_erodeThreshold != -1 && reachedEndPoint != true && !node.IsMarked())
                    {
                        lock (_danglingLinkExtensionTasks)
                        {
                            _danglingLinkExtensionTasks.Add(new Task <int>((o) =>
                                                                           ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, true),
                                                                           TaskCreationOptions.None));
                        }
                        return(null);
                    }

                    return(link);
                }
                else
                {
                    // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1)
                    // Continue traceback. Add this node to that list and recurse.
                    link = CheckAndAddDanglingNode(link, node, out reachedEndPoint);
                    if (reachedEndPoint)
                    {
                        // Loop is found or threshold length has been exceeded.
                        return(link);
                    }
                    else
                    {
                        node            = sameDirectionExtensions.First().Key;
                        sameOrientation = !(sameOrientation ^ sameDirectionExtensions.First().Value.IsSameOrientation);
                    }
                }
            }

            return(null); // code will never reach here. Valid returns happen within the while loop.
        }
        private IEnumerable <PossibleAssembly> ExtendChain(PossibleAssembly currentPath, DeBruijnNode nextNeighbor, bool goingRight, bool sameOrientation)
        {
            byte nextSymbol = MetaNode.GetNextSymbol(nextNeighbor, KmerLength, !goingRight);

            currentPath.Add(nextNeighbor, nextSymbol);
            nextNeighbor.IsVisited = true;
            bool nextRight = !goingRight ^ sameOrientation;
            List <KeyValuePair <DeBruijnNode, bool> > nextNodes = nextRight ? nextNeighbor.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() :
                                                                  nextNeighbor.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited();
            DeBruijnNode next;

            //DeBruijnNode last = currentPath.constituentNodes[currentPath.constituentNodes.Count-1];
            //DeBruijnNode first=currentPath.constituentNodes[0];
            while (nextNodes.Count == 1)
            {
                var nextSet = nextNodes.First();
                next            = nextSet.Key;
                sameOrientation = nextSet.Value;
                nextRight       = (!nextRight) ^ sameOrientation;
                nextSymbol      = MetaNode.GetNextSymbol(next, KmerLength, !nextRight);
                //now check if we are in a circle or a loop at the end, these are very annoying situtations, basic criteria, can't leave
                //the same node the same way twice
                if (next.IsVisited && currentPath.constituentNodes.Contains(next))
                {
                    //okay, if we are equal to the first node or the last node, we can't leave or return the same way we came, otherwise we are done.
                    var excludedNextNodes = currentPath.GetPreviousWaysNodeWasLeft(next);
                    //how many neighbors dow we have in this group?
                    var temp = nextRight ? next.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() : next.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited();
                    temp = temp.Where(x => !excludedNextNodes.Contains(x.Key)).ToList();
                    //only one way to go
                    if (temp.Count == 1)
                    {
                        nextNodes = temp;
                        //currentPath.contigSequence.Add(nextSymbol);
                        currentPath.Add(next, nextSymbol);
                        next.IsVisited = true; //flag not actually used though
                    }
                    else if (temp.Count == 0)  //done
                    {
                        if (currentPath.constituentNodes[0] == next)
                        {
                            currentPath.CircularLoop = true;
                        }
                        yield return(currentPath);

                        //nextNodes.Clear();//we are done
                        yield break;
                    }
                    else //Extend path using all feasible options, then continue.
                    {
                        foreach (var neighbor in temp)
                        {
                            foreach (var v in ExtendChain(currentPath.Clone(), neighbor.Key, nextRight, neighbor.Value))
                            {
                                yield return(v);
                            }
                        }
                        //nextNodes.Clear();//done
                        yield break;
                    }
                }
                else
                {
                    //currentPath.contigSequence.Add(nextSymbol);
                    currentPath.Add(next, nextSymbol);
                    next.IsVisited = true;//flag not actually used though
                    nextNodes      = nextRight ? next.GetRightExtensionNodesWithOrientationMarkingEdgeAsVisited() : next.GetLeftExtensionNodesWithOrientationMarkingEdgeAsVisited();
                }
            }
            //If we have more than one node remaining, have to kick it off.
            if (nextNodes.Count > 1)
            {
                foreach (var neighbor in nextNodes)
                {
                    foreach (var v in ExtendChain(currentPath.Clone(), neighbor.Key, nextRight, neighbor.Value))
                    {
                        yield return(v);
                    }
                }
            }
            if (nextNodes.Count == 0)
            {
                yield return(currentPath);
            }
        }
Exemple #27
0
        /// <summary>
        /// Erode ends of graph that have coverage less than given erodeThreshold.
        /// As optimization, we also check for dangling links and keeps track of the
        /// lengths of the links found. No removal is done at this step.
        /// This is done to get an idea of the different lengths at
        /// which to run the dangling links purger step.
        /// This method returns the lengths of dangling links found.
        /// Locks: Method only does reads. No locking necessary here.
        /// </summary>
        /// <param name="graph">Input graph</param>
        /// <param name="erodeThreshold">Threshold for erosion</param>
        /// <returns>List of lengths of dangling links detected</returns>
        public IEnumerable <int> ErodeGraphEnds(DeBruijnGraph graph, int erodeThreshold = -1)
        {
            if (graph == null)
            {
                throw new ArgumentNullException("graph");
            }

            _erodeThreshold             = erodeThreshold;
            _danglingLinkLengths        = new SortedSet <int>();
            _danglingLinkExtensionTasks = new List <Task <int> >();
            ICollection <DeBruijnNode> graphNodes = graph.Nodes;

            do
            {
                // Make graphNodes into an Array so that Range Partitioning can be used.
                DeBruijnNode[] graphNodesList = graphNodes.ToArray();
                int            rangeSize      = (int)Math.Ceiling((float)graph.Nodes.Count / Environment.ProcessorCount);

                if (rangeSize != 0 && graphNodes.Count != 0)
                {
                    _danglingLinkLengths.UnionWith(
                        Partitioner.Create(0, graphNodesList.Length, rangeSize).AsParallel().SelectMany(chunk =>
                    {
                        SortedSet <int> linkLengths = new SortedSet <int>();
                        for (int i = chunk.Item1; i < chunk.Item2; i++)
                        {
                            DeBruijnNode node = graphNodesList[i];
                            if (node.ExtensionsCount == 0)
                            {
                                if (_erodeThreshold != -1 && node.KmerCount < _erodeThreshold)
                                {
                                    // Mark node for erosion
                                    node.MarkNode();
                                }
                                else
                                {
                                    // Single node island
                                    linkLengths.Add(1);
                                }
                            }
                            else if (node.RightExtensionNodes.Count == 0)
                            {
                                // End of possible dangling link
                                // Traceback to see if it is part of a dangling link
                                DeBruijnPath link = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                                if (link != null && link.PathNodes.Count > 0)
                                {
                                    linkLengths.Add(link.PathNodes.Count);
                                }
                            }
                            else if (node.LeftExtensionNodes.Count == 0)
                            {
                                // End of possible dangling link
                                // Traceback to see if it is part of a dangling link
                                DeBruijnPath link = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                                if (link != null && link.PathNodes.Count > 0)
                                {
                                    linkLengths.Add(link.PathNodes.Count);
                                }
                            }
                        }
                        return(linkLengths);
                    }));

                    // Remove eroded nodes. In the out paranter, get the list of new
                    // end-points that was created by removing eroded nodes.
                    RemoveErodedNodes(graph, out graphNodes);
                }
            } while (graphNodes != null && graphNodes.Count > 0);

            _erodeThreshold = -1;
            ExtendDanglingLinks();
            return(_danglingLinkLengths);
        }
Exemple #28
0
        /// <summary>
        /// Tries to add specified value to the tree setting its count to 1.
        /// If the value is already present in the tree then this method returns the value already in the tree.
        /// Useful when two values that are equal by comparison are not equal by reference.
        /// </summary>
        /// <param name="value">Value to add.</param>
        /// <returns>Returns the node added or found</returns>
        public DeBruijnNode AddOrReturnCurrent(KmerData32 value, bool makeNewIfNotFound = true)
        {
            DeBruijnNode toReturn = null;

            if (this.root == null)
            {
                toReturn  = makeNewNode(value);
                this.root = toReturn;
            }
            else
            {
                ulong        newKey = value.KmerData;
                DeBruijnNode node   = this.root;
                while (true)
                {
                    ulong currentKey = node.NodeValue.KmerData;
                    if (currentKey == newKey)
                    {
                        // key already exists.
                        toReturn = node;
                        break;
                    }
                    else if (newKey < currentKey)
                    {
                        // go to left.
                        if (node.Left == null)
                        {
                            if (makeNewIfNotFound)
                            {
                                toReturn  = makeNewNode(value);
                                node.Left = toReturn;
                            }
                            break;
                        }
                        else
                        {
                            node = node.Left;
                        }
                    }
                    else
                    {
                        // go to right.
                        if (node.Right == null)
                        {
                            if (makeNewIfNotFound)
                            {
                                toReturn   = makeNewNode(value);
                                node.Right = toReturn;
                            }
                            break;
                        }
                        else
                        {
                            node = node.Right;
                        }
                    }
                }
            }
            if (toReturn != null && toReturn.KmerCount < UInt32.MaxValue)
            {
                toReturn.KmerCount++;
            }
            return(toReturn);
        }
Exemple #29
0
        public MetaNode(DeBruijnNode startNode, DeBruijnGraph graph)
        {
            this.NodeNumber = GraphGenerator.NodeCount++;
            KmerLength      = graph.KmerLength;
            if (startNode.IsVisited)
            {
                throw new Exception("If a node has been visited it should not form a metanode, suggests an infinite recursion problem");
            }
            NODE_TYPE type = ClassifyNode(startNode);

            startNode.IsVisited = true;
            //Either of these become their own thing
            if (type == NODE_TYPE.NEXUS || type == NODE_TYPE.ISLAND || type == NODE_TYPE.END_LOOPS_ON_ITSELF)
            {
                ConstituentNodes.Add(startNode);
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                Sequence       = (new Sequence((IAlphabet)NoGapDnaAlphabet.Instance, contigSequence.ToArray())).ConvertToString(0, contigSequence.Count);
            }
            else if (type == NODE_TYPE.LINK_IN_CHAIN)
            {
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                if (!VerifyNotCircular(startNode))
                {
                    MakeCircle(startNode, graph);
                    //throw new Exception("Non circular visualizations not currently supported");
                }
                else
                {
                    //go right first
                    contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                    //var nextNodes = ExtendChain(startNode, true, graph);
                    ExtendChain(startNode, true, graph);
                    //copy the right information and clear it out
                    var tmpRightSeq = contigSequence.ToArray();
                    //skip the first node
                    var tmpRightNodes = ConstituentNodes.Skip(1).ToArray();
                    ConstituentNodes.Clear();
                    contigSequence.Clear();
                    //now go left
                    ExtendChain(startNode, false, graph);
                    //now lets combine
                    ConstituentNodes.Reverse();
                    ConstituentNodes.AddRange(tmpRightNodes);
                    var tmpSequence = new Sequence(DnaAlphabet.Instance, contigSequence.ToArray());
                    tmpSequence = new Sequence(tmpSequence.GetReverseComplementedSequence());
                    string LeftSequence = "";
                    if (tmpSequence.Count > 0)
                    {
                        LeftSequence = tmpSequence.ConvertToString(0, tmpSequence.Count);
                    }
                    tmpSequence    = new Sequence(DnaAlphabet.Instance, tmpRightSeq);
                    Sequence       = LeftSequence + tmpSequence.ConvertToString(0, (tmpSequence.Count));
                    contigSequence = new Sequence(DnaAlphabet.Instance, Sequence).ToList();
                }
            }
            else if (type == NODE_TYPE.GO_LEFT)
            {
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode).GetReverseComplementedSequence());
                //var nextNodes = ExtendChain(startNode, false, graph);
                ExtendChain(startNode, false, graph);
                var tmpSequence = new Sequence(DnaAlphabet.Instance, contigSequence.ToArray());
                //somewhat confusing - originally built the RC of sequence, so RCing again to get correct orientation for
                //neighbors

                tmpSequence    = new Sequence(tmpSequence.GetReverseComplementedSequence());
                contigSequence = tmpSequence.ToList();
                Sequence       = tmpSequence.ConvertToString(0, tmpSequence.Count);
                //flip it so nodes and sequence are in order
                ConstituentNodes.Reverse();
            }
            else if (type == NODE_TYPE.GO_RIGHT)
            {
                contigSequence = new List <byte>(graph.GetNodeSequence(startNode));
                //var nextNodes = ExtendChain(startNode, true, graph);
                ExtendChain(startNode, true, graph);
                var tmpSequence = new Sequence(DnaAlphabet.Instance, contigSequence.ToArray());
                Sequence = tmpSequence.ConvertToString(0, tmpSequence.Count);
            }

            Cement();
        }
Exemple #30
0
        /// <summary>
        /// Follow a chain along a path link a bifurcation or no additional nodes appear.
        /// </summary>
        /// <param name="currentNode"></param>
        /// <param name="goRight"></param>
        /// <param name="graph"></param>
        /// <returns></returns>
        private Dictionary <DeBruijnNode, bool> ExtendChain(DeBruijnNode currentNode, bool goRight, DeBruijnGraph graph)
        {
            ConstituentNodes.Add(currentNode);
            currentNode.IsVisited = true;
            Dictionary <DeBruijnNode, bool> nextNodes;

            if (goRight)
            {
                nextNodes = currentNode.GetRightExtensionNodesWithOrientation();
            }
            else
            {
                nextNodes = currentNode.GetLeftExtensionNodesWithOrientation();
            }
            DeBruijnNode next;
            DeBruijnNode last = currentNode;

            while (nextNodes.Count == 1)
            {
                var nextSet = nextNodes.First();
                next = nextSet.Key;
                bool sameOrientation = nextSet.Value;
                goRight = (!goRight) ^ sameOrientation;
                int oppositeDirectionExtensions = goRight ? next.LeftExtensionNodesCount : next.RightExtensionNodesCount;
                int sameDirectionExtensions     = goRight ? next.RightExtensionNodesCount : next.LeftExtensionNodesCount;
                Debug.Assert(oppositeDirectionExtensions != 0);//should always be >1 given the node we came from.
                if (oppositeDirectionExtensions > 1)
                {
                    break;//nexus, or need to start a new node, no visit count
                }
                else
                {
                    //we have to check if the right path loops back on itself, for example TTTTTTTTTTTT could keep adding T's to infinity, always going back to the same node.
                    //However, it is also possible that the node can refer to itself, but not in a loop, e.g. by turning around, like
                    //TTTTTTTCAATTGAAAAAA which matches the reverse compliment of itself, so leaves the other side (not this might be incorrect as this is guaranteed).
                    //unfortunately, impossible to tell without looking two steps into the future, and because we are doing this one at a time,
                    //have to unwind the last addition.
                    if (next.IsVisited)
                    {
                        //note that this is a bit of an unusual step, as most of the time the other direction extensions will be >1.  This can only
                        //happen if the only incoming node to this k-mer-1 palindrome does not have any other links, which will be rare.
                        if (next == last)
                        {
                            //if going to refer to itself again, it's a loop, need to end it and make a new self referencing mega node.
                            var temp = goRight ? next.GetRightExtensionNodesWithOrientation() : next.GetLeftExtensionNodesWithOrientation();
                            if (temp.Count == 1 && temp.First().Key == last)//three times in a row, need to remove this node from the list as we are not leaving in a different direction, //and need to unvisit the node
                            {
                                //unwind the last addition, this node needs to be a self-referencing mega node
                                next.IsVisited = false;
                                Debug.Assert(ConstituentNodes.Last() == next);
                                ConstituentNodes.RemoveAt(ConstituentNodes.Count - 1);
                                contigSequence.RemoveAt(ConstituentNodes.Count - 1);
                                Debug.Assert(ConstituentNodes.Last() != next);

                                //exit, we are as low as we can go.
                                break;
                            }
                            //criteria is that the sequence can't be there more than once
                        }
                        //At most a kmer can be used to represent the forward and reverse sequence that it has.
                        Debug.Assert(this.ConstituentNodes.Count(x => x == next) < 3);
                    }
                    byte nextSymbol = GetNextSymbol(next, graph.KmerLength, !goRight);
                    contigSequence.Add(nextSymbol);
                    //byte[] original=next.NodeValue.GetOriginalSymbols(MegaNode.KmerLength);
                    //var s=new Sequence(DnaAlphabet.Instance,original);
                    //Console.WriteLine(s.ConvertToString());

                    next.IsVisited = true;
                    ConstituentNodes.Add(next);
                    nextNodes = goRight ? next.GetRightExtensionNodesWithOrientation() : next.GetLeftExtensionNodesWithOrientation();
                    last      = next;
                }
            }

            return(nextNodes);
        }