Esempio n. 1
0
        /// <summary>
        /// Marks the RightExtensions of the current node as invalid.
        /// </summary>
        /// <param name="node">Debruijn node which matches one of the right extensions of the current node.</param>
        public bool MarkRightExtensionAsInvalid(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            if (this.RightExtension0 == node)
            {
                this.InvalidRightExtension0 = true;
                return(true);
            }
            else if (this.RightExtension1 == node)
            {
                this.InvalidRightExtension1 = true;
                return(true);
            }
            else if (this.RightExtension2 == node)
            {
                this.InvalidRightExtension2 = true;
                return(true);
            }
            else if (this.RightExtension3 == node)
            {
                this.InvalidRightExtension3 = true;
                return(true);
            }

            return(false);
        }
Esempio n. 2
0
 /// <summary>
 /// Check if input node is null
 /// </summary>
 /// <param name="node">Input node</param>
 private static void ValidateNode(DeBruijnNode node)
 {
     if (node == null)
     {
         throw new ArgumentNullException("node");
     }
 }
Esempio n. 3
0
        /// <summary>
        /// Searches for a particular node in the tree.
        /// </summary>
        /// <param name="kmerValue">The node to be searched.</param>
        /// <returns>Actual node in the tree.</returns>
        public DeBruijnNode SearchTree(IKmerData kmerValue)
        {
            // this should never happen.
            if (kmerValue == null)
            {
                return(null);
            }

            DeBruijnNode startNode = this.root;

            while (startNode != null)
            {
                int result = kmerValue.CompareTo(startNode.NodeValue);

                // parameter value found
                if (result == 0)
                {
                    break;
                }
                else if (result < 0)
                {
                    // Search left if the value is smaller than the current node
                    startNode = startNode.Left; // search left
                }
                else
                {
                    startNode = startNode.Right; // search right
                }
            }

            return(startNode);
        }
Esempio n. 4
0
        /// <summary>
        /// Gets the sequence for kmer associated with input node.
        /// Uses index and position information along with base sequence
        /// to construct sequence.
        /// There should be atleast one valid position in the node.
        /// Since all positions indicate the same kmer sequence,
        /// the position information from the first kmer is used
        /// to construct the sequence
        /// </summary>
        /// <param name="node">Graph Node</param>
        /// <returns>Sequence associated with input node</returns>
        public ISequence GetNodeSequence(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            // Get sequence index and validate
            int sequenceIndex = node.SequenceIndex;

            if (sequenceIndex < 0 || sequenceIndex >= _baseSequence.Count)
            {
                throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerIndexOutOfRange);
            }

            // Get base sequence, position and validate
            ISequence baseSequence = _baseSequence[sequenceIndex];
            int       position     = node.KmerPosition;

            if (position < 0 || position + node.KmerLength > baseSequence.Count)
            {
                throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerPositionOutOfRange);
            }

            if (position == 0 && baseSequence.Count == node.KmerLength)
            {
                return(baseSequence);
            }

            return(baseSequence.Range(position, node.KmerLength));
        }
        /// <summary>
        /// Gets the sequence from the specified node.
        /// </summary>
        /// <param name="node">DeBruijn node.</param>
        /// <returns>Returns an instance of sequence.</returns>
        public ISequence GetNodeSequence(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            return(new Sequence(Alphabets.DNA, node.GetOriginalSymbols(this.KmerLength)));
        }
Esempio n. 6
0
        /// <summary>
        /// Adds the links between the nodes of the graph.
        /// </summary>
        private void GenerateLinks(KmerDictionary kmerManager)
        {
            // Prepare a mask to remove the bits representing the first nucleotide (or left most bits in the encoded kmer)
            // First calculate how many bits do you have to move down a character until you are at the start of the kmer encoded sequence
            int   distancetoShift = 2 * (KmerLength - 1);
            ulong rightMask       = ~(((ulong)3) << distancetoShift);

            Parallel.ForEach(_nodes, node =>
            {
                DeBruijnNode searchResult  = null;
                KmerData32 searchNodeValue = new KmerData32();

                // Right Extensions - Remove first position from the value
                // Remove the left most value by using an exclusive
                ulong nextKmer = node.NodeValue.KmerData & rightMask;

                // Move it over two to get make a position for the next pair of bits to represent a new nucleotide
                nextKmer = nextKmer << 2;
                for (ulong i = 0; i < 4; i++)
                {
                    ulong tmpNextKmer = nextKmer | i;    // Equivalent to "ACGTA"+"N" where N is the 0-3 encoding for A,C,G,T

                    // Now to set the kmer value to this, the orientationForward value is equal to false if the
                    // reverse compliment of the kmer is used instead of the kmer value itself.
                    bool matchIsRC = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength);
                    searchResult   = kmerManager.TryGetOld(searchNodeValue);
                    if (searchResult != null)
                    {
                        node.SetExtensionNode(true, matchIsRC, searchResult);
                    }
                }

                // Left Extensions
                nextKmer = node.NodeValue.KmerData;

                //Chop off the right most basepair
                nextKmer >>= 2;
                for (ulong i = 0; i < 4; i++)     // Cycle through A,C,G,T
                {
                    // Add the character on to the left side of the kmer
                    // Equivalent to "N" + "ACGAT" where the basepair is added on as the 2 bits
                    ulong tmpNextKmer = (i << distancetoShift) | nextKmer;
                    bool matchIsRC    = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength);
                    searchResult      = kmerManager.TryGetOld(searchNodeValue);
                    if (searchResult != null)
                    {
                        node.SetExtensionNode(false, matchIsRC, searchResult);
                    }
                }
            });

            LinkGenerationCompleted = true;
        }
Esempio n. 7
0
 /// <summary>
 /// Makes extension edge corresponding to the node invalid,
 /// after checking whether given node is part of left or right extensions.
 /// Not Thread-safe. Use lock at caller if required.
 /// </summary>
 /// <param name="node">Node for which extension is to be made invalid</param>
 public void MarkExtensionInvalid(DeBruijnNode node)
 {
     ValidateNode(node);
     if (_rightEndExtensionNodes.ContainsKey(node))
     {
         _rightEndExtensionNodes[node].IsValid = false;
     }
     else if (_leftEndExtensionNodes.ContainsKey(node))
     {
         _leftEndExtensionNodes[node].IsValid = false;
     }
 }
Esempio n. 8
0
        /// <summary>
        /// Makes extension edge corresponding to the node invalid,
        /// after checking whether given node is part of left or right extensions.
        /// Not Thread-safe. Use lock at caller if required.
        /// </summary>
        /// <param name="node">Node for which extension is to be made invalid.</param>
        public void MarkExtensionInvalid(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            if (!this.MarkRightExtensionAsInvalid(node))
            {
                this.MarkLeftExtensionAsInvalid(node);
            }
        }
Esempio n. 9
0
        /// <summary>
        /// Gets the last or first symbol in the node depending on the isForwardDirection flag is true or false.
        /// If the isSameOrientation flag is false then symbol will be taken from the ReverseComplement of the kmer data.
        /// </summary>
        /// <param name="node">DeBruijn node.</param>
        /// <param name="isForwardDirection">Flag to indicate whether the node is in forward direction or not.</param>
        /// <param name="isSameOrientation">Flag to indicate the orientation.</param>
        /// <returns>Byte represnting the symbol.</returns>
        public byte GetNextSymbolFrom(DeBruijnNode node, bool isForwardDirection, bool isSameOrientation)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            byte[] nextSequence = isSameOrientation
                ? node.GetOriginalSymbols(KmerLength)
                : node.GetReverseComplementOfOriginalSymbols(KmerLength);

            return(isForwardDirection ? nextSequence.Last() : nextSequence.First());
        }
Esempio n. 10
0
        /// <summary>
        /// Add node with given orientation to right extension edges.
        /// Not thread-safe. Use lock at caller if required.
        /// </summary>
        /// <param name="node">Node to add right-extension to</param>
        /// <param name="isSameOrientation">Orientation of connecting edge</param>
        public void AddRightEndExtension(DeBruijnNode node, bool isSameOrientation)
        {
            ValidateNode(node);
            DeBruijnEdge edge;

            if (_rightEndExtensionNodes.TryGetValue(node, out edge))
            {
                _rightEndExtensionNodes[node].IsSameOrientation ^= isSameOrientation;
            }
            else
            {
                _rightEndExtensionNodes[node] = new DeBruijnEdge(isSameOrientation);
            }
        }
Esempio n. 11
0
        /// <summary>
        /// Tries to add specified value to the BinaryTree.
        /// If the value is already present in the tree then this method returns the value already in the tree.
        /// Useful when two values that are equal by comparison are not equal by reference.
        /// </summary>
        /// <param name="value">Value to add.</param>
        /// <returns>Returns the value added or already in the tree, else returns false.</returns>
        public DeBruijnNode AddOrReturnCurrent(KmerData32 value)
        {
            DeBruijnNode toReturn;

            if (_root == null)
            {
                toReturn = MakeNewNode(value);
                _root    = toReturn;
            }
            else
            {
                ulong        newKey = value.KmerData;
                DeBruijnNode node   = _root;
                while (true)
                {
                    ulong currentKey = node.NodeValue.KmerData;
                    if (currentKey == newKey)
                    {
                        // key already exists.
                        toReturn = node;
                        break;
                    }

                    if (newKey < currentKey)
                    {
                        // go to left.
                        if (node.Left == null)
                        {
                            toReturn  = MakeNewNode(value);
                            node.Left = toReturn;
                            break;
                        }
                        node = node.Left;
                    }
                    else
                    {
                        // go to right.
                        if (node.Right == null)
                        {
                            toReturn   = MakeNewNode(value);
                            node.Right = toReturn;
                            break;
                        }
                        node = node.Right;
                    }
                }
            }
            return(toReturn);
        }
Esempio n. 12
0
        /// <summary>
        /// Initializes a new instance of the PathWithOrientation class.
        /// </summary>
        /// <param name="node1">First node to add.</param>
        /// <param name="node2">Second node to add.</param>
        /// <param name="orientation">Path orientation.</param>
        public PathWithOrientation(DeBruijnNode node1, DeBruijnNode node2, bool orientation)
        {
            if (node1 == null)
            {
                throw new ArgumentNullException("node1");
            }

            if (node2 == null)
            {
                throw new ArgumentNullException("node2");
            }

            this.nodes = new List<DeBruijnNode> { node1, node2 };
            this.IsSameOrientation = orientation;
        }
Esempio n. 13
0
 /// <summary>
 /// Tries to add specified value to the BinaryTree.
 /// If the value is already present in the tree then this method returns the value already in the tree.
 /// Useful when two values that are equal by comparison are not equal by reference.
 /// </summary>
 /// <param name="value">Value to add.</param>
 /// <returns>Returns the value added or already in the tree, else returns false.</returns>
 public DeBruijnNode AddOrReturnCurrent(KmerData32 value)
 {
     DeBruijnNode toReturn;
     if (_root == null)
     {
         toReturn = MakeNewNode(value);
         _root = toReturn;
     }
     else
     {
         ulong newKey = value.KmerData;
         DeBruijnNode node = _root;
         while (true)
         {
             ulong currentKey = node.NodeValue.KmerData;
             if (currentKey == newKey)
             {
                 // key already exists.
                 toReturn = node;
                 break;
             }
             
             if (newKey < currentKey)
             {
                 // go to left.
                 if (node.Left == null)
                 {
                     toReturn = MakeNewNode(value);
                     node.Left = toReturn;
                     break;
                 }
                 node = node.Left;
             }
             else
             {
                 // go to right.
                 if (node.Right == null)
                 {
                     toReturn = MakeNewNode(value);
                     node.Right = toReturn;
                     break;
                 }
                 node = node.Right;
             }
         }
     }
     return toReturn;
 }
Esempio n. 14
0
        /// <summary>
        /// Removes edge corresponding to the node from appropriate data structure,
        /// after checking whether given node is part of left or right extensions.
        /// Thread-safe method
        /// </summary>
        /// <param name="node">Node for which extension is to be removed</param>
        public void RemoveExtensionThreadSafe(DeBruijnNode node)
        {
            ValidateNode(node);
            bool removed;

            lock (_rightEndExtensionNodes)
            {
                removed = _rightEndExtensionNodes.Remove(node);
            }

            if (!removed)
            {
                lock (_leftEndExtensionNodes)
                {
                    _leftEndExtensionNodes.Remove(node);
                }
            }
        }
Esempio n. 15
0
        /// <summary>
        /// Searches for a particular node in the tree.
        /// </summary>
        /// <param name="kmerValue">The node to be searched.</param>
        /// <returns>Actual node in the tree.</returns>
        public DeBruijnNode SearchTree(KmerData32 kmerValue)
        {
            DeBruijnNode startNode = Root;

            while (startNode != null)
            {
                int result = kmerValue.CompareTo(startNode.NodeValue);
                if (result == 0)  // not found
                {
                    break;
                }

                // Search left if the value is smaller than the current node
                startNode = result < 0 ? startNode.Left : startNode.Right;
            }

            return(startNode);
        }
Esempio n. 16
0
        /// <summary>
        ///     Searches for a particular node in the tree.
        /// </summary>
        /// <param name="kmerValue">The node to be searched.</param>
        /// <returns>Actual node in the tree.</returns>
        public DeBruijnNode SearchTree(KmerData32 kmerValue)
        {
            DeBruijnNode startNode = _root;

            while (startNode != null)
            {
                ulong currentValue = startNode.NodeValue.KmerData;

                // parameter value found
                if (currentValue == kmerValue.KmerData)
                {
                    break;
                }

                startNode = kmerValue.KmerData < currentValue ? startNode.Left : startNode.Right;
            }

            return(startNode);
        }
Esempio n. 17
0
 public IEnumerable <DeBruijnNode> GetNodes()
 {
     if (Count > 0)
     {
         var traversalStack = new Stack <DeBruijnNode>((int)Math.Log(Count, 2.0));
         traversalStack.Push(_root);
         while (traversalStack.Count > 0)
         {
             DeBruijnNode current = traversalStack.Pop();
             if (current != null)
             {
                 traversalStack.Push(current.Right);
                 traversalStack.Push(current.Left);
                 if (!current.IsDeleted)
                 {
                     yield return(current);
                 }
             }
         }
     }
 }
Esempio n. 18
0
        /// <summary>
        /// Gets the nodes present in this graph.
        /// Nodes marked for delete are not returned.
        /// </summary>
        /// <returns>The list of all available nodes in the graph.</returns>
        public IEnumerable <DeBruijnNode> GetNodes()
        {
            var traversalStack = new Stack <DeBruijnNode>();

            traversalStack.Push(Root);
            while (traversalStack.Count > 0)
            {
                DeBruijnNode current = traversalStack.Pop();
                if (current != null)
                {
                    traversalStack.Push(current.Right);
                    traversalStack.Push(current.Left);
                    if (!current.IsDeleted)
                    {
                        yield return(current);
                    }
                }
            }

            traversalStack.TrimExcess();
        }
Esempio n. 19
0
        /// <summary>
        /// This gets the next symbol from a node while forming chains.  This can be made a lot more efficient if it turns in to a bottleneck.
        /// all chains are extended from either the first or last base present in the node, and this base is either forward
        /// or reverse complimented, this method reflects this.
        /// </summary>
        /// <param name="node">Next node</param>
        /// <param name="graph">Graph to get symbol from</param>
        /// <param name="GetFirstNotLast">First or last base?</param>
        /// <param name="ReverseComplimentBase">Should the compliment of the base be returned</param>
        /// <returns></returns>
        private static byte GetNextSymbol(DeBruijnNode node, int kmerLength, bool GetRCofFirstBaseInsteadOfLastBase)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }
            byte[] symbols = node.GetOriginalSymbols(kmerLength);
            byte   value   = GetRCofFirstBaseInsteadOfLastBase ? symbols.First() : symbols.Last();

            if (GetRCofFirstBaseInsteadOfLastBase)
            {
                byte value2;
                bool rced = DnaAlphabet.Instance.TryGetComplementSymbol(value, out value2);
                //Should never happend
                if (!rced)
                {
                    throw new Exception("Could not revcomp base during graph construction");
                }
                value = value2;
            }
            return(value);
        }
Esempio n. 20
0
        /// <summary>
        /// Builds a contig graph from kmer graph using contig data information.
        /// Creates a graph node for each contig, computes adjacency
        /// for contig graph using edge information in kmer graph.
        /// Finally, all kmer nodes are deleted from the graph.
        /// </summary>
        /// <param name="contigs">List of contig data</param>
        /// <param name="kmerLength">Kmer length</param>
        public void BuildContigGraph(IList <ISequence> contigs, int kmerLength)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            // Create contig nodes
            DeBruijnNode[] contigNodes = new DeBruijnNode[contigs.Count];
            Parallel.For(0, contigs.Count, ndx => contigNodes[ndx] = new DeBruijnNode(contigs[ndx].Count, ndx));

            GenerateContigAdjacency(contigs, kmerLength, contigNodes);

            // Update graph with new nodes
            _baseSequence = new List <ISequence>(contigs);
            _kmerNodes    = new HashSet <DeBruijnNode>(contigNodes);
        }
Esempio n. 21
0
        /// <summary>
        /// Marks the RightExtensions of the current node as invalid.
        /// </summary>
        /// <param name="node">Debruijn node which matches one of the right extensions of the current node.</param>
        public bool MarkRightExtensionAsInvalid(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            if (this.RightExtension0 == node)
            {
                this.InvalidRightExtension0 = true;
                return true;
            }
            else if (this.RightExtension1 == node)
            {
                this.InvalidRightExtension1 = true;
                return true;
            }
            else if (this.RightExtension2 == node)
            {
                this.InvalidRightExtension2 = true;
                return true;
            }
            else if (this.RightExtension3 == node)
            {
                this.InvalidRightExtension3 = true;
                return true;
            }

            return false;
        }
Esempio n. 22
0
        /// <summary>
        /// Removes all the invalid extensions permanently.
        /// </summary>
        public void PurgeInvalidExtensions()
        {
            if (this.RightExtension0 != null && this.InvalidRightExtension0)
            {
                this.RightExtension0 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension1 != null && this.InvalidRightExtension1)
            {
                this.RightExtension1 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension2 != null && this.InvalidRightExtension2)
            {
                this.RightExtension2 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension3 != null && this.InvalidRightExtension3)
            {
                this.RightExtension3 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.LeftExtension0 != null && this.InvalidLeftExtension0)
            {
                this.LeftExtension0 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension1 != null && this.InvalidLeftExtension1)
            {
                this.LeftExtension1 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension2 != null && this.InvalidLeftExtension2)
            {
                this.LeftExtension2 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension3 != null && this.InvalidLeftExtension3)
            {
                this.LeftExtension3 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }
        }
Esempio n. 23
0
        /// <summary>
        /// Trace simple path starting from 'node' in specified direction.
        /// </summary>
        /// <param name="assembledContigs">List of assembled contigs.</param>
        /// <param name="node">Starting node of contig path.</param>
        /// <param name="isForwardDirection">Boolean indicating direction of path.</param>
        /// <param name="createContigSequences">Boolean indicating whether the contig sequences are to be created or not.</param>
        /// <param name="DuplicatesPossible">Boolean indicating if duplicates are possible, true if both the forward and reverse path could be generated</param>
        private void TraceSimplePath(List<ISequence> assembledContigs, DeBruijnNode node, bool isForwardDirection, bool createContigSequences,bool DuplicatesPossible)
        {
            ISequence nodeSequence = _graph.GetNodeSequence(node);
            List<byte> contigSequence = new List<byte>(nodeSequence);
            node.IsVisited = true;
            List<DeBruijnNode> contigPath = new List<DeBruijnNode> { node };
            KeyValuePair<DeBruijnNode, bool> nextNode =
                isForwardDirection ? node.GetRightExtensionNodesWithOrientation().First() : node.GetLeftExtensionNodesWithOrientation().First();
            
            TraceSimplePathLinks(contigPath, contigSequence, isForwardDirection, nextNode.Value, nextNode.Key, createContigSequences);

            // Check to remove duplicates
            if (!DuplicatesPossible || contigPath[0].NodeValue.CompareTo(contigPath.Last().NodeValue) >= 0)
            {
                // Check contig coverage.
                if (_coverageThreshold != -1)
                {
                    // Definition from Velvet Manual: http://helix.nih.gov/Applications/velvet_manual.pdf
                    // "k-mer coverage" is how many times a k-mer has been seen among the reads.
                    double coverage = contigPath.Average(n => n.KmerCount);
                    if (coverage < _coverageThreshold)
                    {
                        contigPath.ForEach(n => n.MarkNodeForDelete());
                    }
                }
                else
                {
                    if (createContigSequences)
                    {
                        lock (assembledContigs)
                        {
                            assembledContigs.Add(new Sequence(nodeSequence.Alphabet, contigSequence.ToArray()));
                        }
                    }
                }
            }
        }
Esempio n. 24
0
        /// <summary>
        /// Deletes the extension nodes those are marked for deletion.
        /// </summary>
        public void RemoveMarkedExtensions()
        {
            // If node is marked for deletion, ignore it. No need for any update.
            if (this.IsMarkedForDelete)
            {
                return;
            }

            if (this.RightExtension0 != null && this.RightExtension0.IsMarkedForDelete)
            {
                this.RightExtension0 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension1 != null && this.RightExtension1.IsMarkedForDelete)
            {
                this.RightExtension1 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension2 != null && this.RightExtension2.IsMarkedForDelete)
            {
                this.RightExtension2 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension3 != null && this.RightExtension3.IsMarkedForDelete)
            {
                this.RightExtension3 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.LeftExtension0 != null && this.LeftExtension0.IsMarkedForDelete)
            {
                this.LeftExtension0 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension1 != null && this.LeftExtension1.IsMarkedForDelete)
            {
                this.LeftExtension1 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension2 != null && this.LeftExtension2.IsMarkedForDelete)
            {
                this.LeftExtension2 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension3 != null && this.LeftExtension3.IsMarkedForDelete)
            {
                this.LeftExtension3 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }
        }
Esempio n. 25
0
        /// <summary>
        /// Validate the DeBruijnNode ctor by passing the kmer and validating 
        /// the node object.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeCtor(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);
            string nodeExtensionsCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.NodeExtensionsCountNode);
            string kmersCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmersCountNode);
            string leftNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.LeftNodeExtensionsCountNode);
            string rightNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.RightNodeExtensionsCountNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
            {
                sequenceReads = parser.Parse();

                // Build the kmers using this
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1);

                node.SetExtensionNode(false, true, leftnode);
                node.SetExtensionNode(true, true, rightnode);

                // Validate DeBruijnNode class properties.
                Assert.AreEqual(nodeExtensionsCount, node.ExtensionsCount.ToString((IFormatProvider)null));
                Assert.AreEqual(kmersCount, node.KmerCount.ToString((IFormatProvider)null));
                Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null));
            }

            ApplicationLog.WriteLine("Padena P1 : DeBruijnNode ctor() validation for Padena step2 completed successfully");
        }
Esempio n. 26
0
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        public void Build(IEnumerable <ISequence> sequences)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (this.kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>();

            Task buildKmers = Task.Factory.StartNew(() =>
            {
                while (!kmerDataCollection.IsCompleted)
                {
                    DeBruijnNode newNode = null;
                    if (kmerDataCollection.TryTake(out newNode, -1))
                    {
                        // Tree Node Creation

                        // create a new node
                        if (this.root == null)   // first element being added
                        {
                            this.root = newNode; // set node as root of the tree
                            this.NodeCount++;
                            continue;
                        }

                        int result          = 0;
                        DeBruijnNode temp   = this.root;
                        DeBruijnNode parent = this.root;

                        // Search the tree where the new node should be inserted
                        while (temp != null)
                        {
                            result = newNode.NodeValue.CompareTo(temp.NodeValue);
                            if (result == 0)
                            {
                                if (temp.KmerCount <= 255)
                                {
                                    temp.KmerCount++;
                                    break;
                                }
                            }
                            else if (result > 0) // move to right sub-tree
                            {
                                parent = temp;
                                temp   = temp.Right;
                            }
                            else if (result < 0) // move to left sub-tree
                            {
                                parent = temp;
                                temp   = temp.Left;
                            }
                        }

                        // position found
                        if (result > 0) // add as right child
                        {
                            parent.Right = newNode;
                            NodeCount++;
                        }
                        else if (result < 0) // add as left child
                        {
                            parent.Left = newNode;
                            NodeCount++;
                        }
                    } // End of tree node creation.
                }
            });

            IAlphabet alphabet = sequences.First().Alphabet;

            byte[]         symbolMap        = alphabet.GetSymbolValueMap();
            HashSet <byte> ambiguousSymbols = alphabet.GetAmbiguousSymbols();
            HashSet <byte> gapSymbols;

            alphabet.TryGetGapSymbols(out gapSymbols);

            // Generate the kmers from the sequences
            foreach (ISequence sequence in sequences)
            {
                // if the blocking collection count is exceeding 2 million wait for 5 sec
                // so that the task can remove some kmers and creat the nodes.
                // This will avoid OutofMemoryException
                while (kmerDataCollection.Count > 2000000)
                {
                    System.Threading.Thread.Sleep(5);
                }

                long   count            = sequence.Count;
                byte[] convertedSymbols = new byte[count];
                bool   skipSequence     = false;

                for (long index = 0; index < count; index++)
                {
                    convertedSymbols[index] = symbolMap[sequence[index]];
                    if (ambiguousSymbols.Contains(convertedSymbols[index]) || gapSymbols.Contains(convertedSymbols[index]))
                    {
                        skipSequence = true;
                        break;
                    }
                }

                if (skipSequence)
                {
                    continue;
                }

                Sequence convertedSequence = new Sequence(sequence.Alphabet, convertedSymbols, false);

                // generate the kmers from each sequence
                for (long i = 0; i <= count - this.kmerLength; ++i)
                {
                    IKmerData kmerData    = this.GetNewKmerData();
                    bool      orientation = kmerData.SetKmerData(convertedSequence, i, this.kmerLength);
                    kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1));
                }
            }

            kmerDataCollection.CompleteAdding();

            Task.WaitAll(buildKmers);

            kmerDataCollection.Dispose();

            // Generate the links
            this.GenerateLinks();
        }
Esempio n. 27
0
        /// <summary>
        /// Validate AddRightEndExtension() method of DeBruijnNode 
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeAddRightExtension(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);

            // Get the input reads and build kmers
            using (FastAParser parser = new FastAParser(filePath))
            {
                IEnumerable<ISequence> sequenceReads = parser.Parse();

                // Build kmers from step1
                this.KmerLength = int.Parse(kmerLength, null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode rightNode = new DeBruijnNode(kmerData, 1);
                node.SetExtensionNode(true, true, rightNode);
                Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.RightExtensionNodesCount);
            }

            ApplicationLog.WriteLine(@"Padena BVT :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully");
        }
Esempio n. 28
0
        /// <summary>
        /// Validate RemoveExtension() method of DeBruijnNode 
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeRemoveExtension(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
              Constants.KmerLengthNode);

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
parser.Open(filePath);
                sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build kmers from step1
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                this.SequenceReads.Clear();

                this.SetSequenceReads(sequenceReads.ToList());
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1);

                node.SetExtensionNode(false, true, leftnode);
                node.SetExtensionNode(true, true, rightnode);

                // Validates count before removing right and left extension nodes.
                Assert.AreEqual(lstKmers[1].Kmers.First().Count,
                  node.RightExtensionNodesCount);
                Assert.AreEqual(1, node.RightExtensionNodesCount);
                Assert.AreEqual(1, node.LeftExtensionNodesCount);

                // Remove right and left extension nodes.
                node.RemoveExtensionThreadSafe(rightnode);
                node.RemoveExtensionThreadSafe(leftnode);

                // Validate node after removing right and left extensions.
                Assert.AreEqual(0, node.RightExtensionNodesCount);
                Assert.AreEqual(0, node.LeftExtensionNodesCount);

            ApplicationLog.WriteLine(@"Padena P1 :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully");
        }
Esempio n. 29
0
        /// <summary>
        /// Removes edge corresponding to the node from appropriate data structure,
        /// after checking whether given node is part of left or right extensions.
        /// Thread-safe method.
        /// </summary>
        /// <param name="node">Node for which extension is to be removed.</param>
        public void RemoveExtensionThreadSafe(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            lock (this)
            {
                if (this.RightExtension0 == node)
                {
                    this.RightExtension0 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }

                if (this.RightExtension1 == node)
                {
                    this.RightExtension1 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }

                if (this.RightExtension2 == node)
                {
                    this.RightExtension2 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }

                if (this.RightExtension3 == node)
                {
                    this.RightExtension3 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }
            }

            lock (this)
            {
                if (this.LeftExtension0 == node)
                {
                    this.LeftExtension0 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }

                if (this.LeftExtension1 == node)
                {
                    this.LeftExtension1 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }

                if (this.LeftExtension2 == node)
                {
                    this.LeftExtension2 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }

                if (this.LeftExtension3 == node)
                {
                    this.LeftExtension3 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }
            }
        }
Esempio n. 30
0
        public void Build(IEnumerable <ISequence> sequences)
        {
            // Size of Kmer List to grab, somewhat arbitrary but want to keep list size below large object threshold, which is ~85 kb
            const int blockSize = 4096;

            // When to add list to blocking collection, most short reads are <=151 bp so this should avoid needing to grow the list
            const int addThreshold = blockSize - 151;

            // When to pause adding
            const int stopAddThreshold = 2000000 / blockSize;

            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (KmerLength > KmerData32.MAX_KMER_LENGTH)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthGreaterThan31);
            }

            // A dictionary kmers to debruijin nodes
            KmerDictionary kmerManager = new KmerDictionary();

            // Create the producer thread.
            var  kmerDataCollection = new BlockingCollection <List <KmerData32> >();
            Task producer           = Task.Factory.StartNew(() =>
            {
                try
                {
                    List <KmerData32> kmerList = new List <KmerData32>(blockSize);

                    IAlphabet alphabet = Alphabets.DNA;
                    HashSet <byte> gapSymbols;
                    alphabet.TryGetGapSymbols(out gapSymbols);

                    // Generate the kmers from the sequences
                    foreach (ISequence sequence in sequences)
                    {
                        // if the sequence alphabet is not of type DNA then ignore it.
                        bool skipSequence = false;
                        if (sequence.Alphabet != Alphabets.DNA)
                        {
                            skipSequence = true;
                        }
                        else
                        {
                            // if the sequence contains any gap symbols then ignore the sequence.
                            foreach (byte symbol in gapSymbols)
                            {
                                for (long index = 0; index < sequence.Count; ++index)
                                {
                                    if (sequence[index] == symbol)
                                    {
                                        skipSequence = true;
                                        break;
                                    }
                                }

                                if (skipSequence)
                                {
                                    break;
                                }
                            }
                        }

                        if (skipSequence)
                        {
                            Interlocked.Increment(ref _skippedSequencesCount);
                            Interlocked.Increment(ref _processedSequencesCount);
                            continue;
                        }

                        // if the blocking collection count is exceeding 2 million kmers wait for 2 sec
                        // so that the task can remove some kmers and create the nodes.
                        // This will avoid OutofMemoryException
                        while (kmerDataCollection.Count > stopAddThreshold)
                        {
                            Task.Delay(TimeSpan.FromSeconds(2)).Wait();
                        }

                        // Convert sequences to k-mers
                        kmerList.AddRange(KmerData32.GetKmers(sequence, KmerLength));

                        // Most reads are <=150 basepairs, so this should avoid having to grow the list
                        // by keeping it below blockSize
                        if (kmerList.Count > addThreshold)
                        {
                            kmerDataCollection.Add(kmerList);
                            kmerList = new List <KmerData32>(4092);
                        }
                        Interlocked.Increment(ref _processedSequencesCount);
                    }

                    if (kmerList.Count <= addThreshold)
                    {
                        kmerDataCollection.Add(kmerList);
                    }
                }
                finally
                {
                    kmerDataCollection.CompleteAdding();
                }
            });

            // Consume k-mers by addding them to binary tree structure as nodes
            Parallel.ForEach(kmerDataCollection.GetConsumingEnumerable(), newKmerList =>
            {
                foreach (KmerData32 newKmer in newKmerList)
                {
                    // Create Vertex
                    DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer);

                    // Need to lock node if doing this in parallel
                    if (node.KmerCount <= 255)
                    {
                        lock (node)
                        {
                            node.KmerCount++;
                        }
                    }
                }
            });

            // Ensure producer exceptions are handled.
            producer.Wait();

            // Done filling binary tree
            kmerDataCollection.Dispose();

            //NOTE: To speed enumeration make the nodes into an array and dispose of the collection
            _nodeCount = kmerManager.NodeCount;
            _nodes     = kmerManager.GenerateNodeArray();

            // Generate the links
            GenerateLinks(kmerManager);

            // Since we no longer need to search for values set left and right nodes of child array to null
            // so that they are available for GC if no longer needed
            foreach (DeBruijnNode node in _nodes)
            {
                node.Left = node.Right = null;
            }

            GraphBuildCompleted = true;
        }
Esempio n. 31
0
        /// <summary>
        /// Checks if 'node' can be added to 'path' without causing a loop.
        /// If yes, adds node to path and returns true. If not, returns false.
        /// </summary>
        /// <param name="contigPath">List of graph nodes corresponding to contig path.</param>
        /// <param name="contigSequence">Sequence of contig being assembled.</param>
        /// <param name="nextNode">Next node on the path to be added.</param>
        /// <param name="isForwardDirection">Boolean indicating direction.</param>
        /// <param name="isSameOrientation">Boolean indicating orientation.</param>
        /// <param name="createContigSequences">Boolean indicating whether contig sequences are to be created or not.</param>
        /// <returns>Boolean indicating if path was updated successfully.</returns>
        private bool CheckAndAddNode(
            List<DeBruijnNode> contigPath,
            List<byte> contigSequence,
            DeBruijnNode nextNode,
            bool isForwardDirection,
            bool isSameOrientation,
            bool createContigSequences)
        {
            // Since ambiguous extensions have been removed, the only way a link could be in the list 
            // is if the first item in the list points to this item
         
            if (contigPath.Count>0 && contigPath.Contains(nextNode)) //contigPath[0]==nextNode)
            {
                // there is a loop in this link
                // Return false indicating no update has been made
                return false;
            }
            
            // Add node to contig list
            contigPath.Add(nextNode);

            if (createContigSequences)
            {
                // Update contig sequence with sequence from next node
                byte symbol = _graph.GetNextSymbolFrom(nextNode, isForwardDirection, isSameOrientation);

                if (isForwardDirection)
                {
                    contigSequence.Add(symbol);
                }
                else
                {
                    contigSequence.Insert(0, symbol);
                }
            }

            return true;
        }
Esempio n. 32
0
        /// <summary>
        /// Trace simple path in specified direction.
        /// </summary>
        /// <param name="contigPath">List of graph nodes corresponding to contig path.</param>
        /// <param name="contigSequence">Sequence of contig being assembled.</param>
        /// <param name="isForwardDirection">Boolean indicating direction of path.</param>
        /// <param name="sameOrientation">Path orientation.</param>
        /// <param name="node">Next node on the path.</param>
        /// <param name="createContigSequences">Indicates whether the contig sequences are to be created or not.</param>
        private void TraceSimplePathLinks(
            List<DeBruijnNode> contigPath,
            List<byte> contigSequence,
            bool isForwardDirection,
            bool sameOrientation,
            DeBruijnNode node,
            bool createContigSequences)
        {
            bool endFound = false;
            while (!endFound)
            {
                node.IsVisited = true;
                // Get extensions going in same directions.
                Dictionary<DeBruijnNode, bool> sameDirectionExtensions = (isForwardDirection ^ sameOrientation) 
                    ? node.GetLeftExtensionNodesWithOrientation() 
                    : node.GetRightExtensionNodesWithOrientation();

                if (sameDirectionExtensions.Count == 0)
                {
                    // Found end of path. Add this and return
                    CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences);
                    endFound = true;
                }
                else
                {
                    var sameDirectionExtension = sameDirectionExtensions.First();

                    // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1)
                    // Continue traceback in the same direction. Add this node to list and continue.
                    if (!CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences))
                    {
                        // Loop is found. Cannot extend simple path further 
                        //Assuming that any node with extensions >2 from either side have been trimmed, this should only be possible if the first
                        //node in list is last node as well, this means there is a circle in the graph of length >1, going to report it
                        if (contigPath != null && contigPath.Count > 0 && contigPath[0] == node)
                        {
                            endFound = true;
                        }
                    }
                    else
                    {
                        node = sameDirectionExtension.Key;
                        sameOrientation =
                            !(sameOrientation ^ sameDirectionExtension.Value);
                    }
                }
            }
        }
Esempio n. 33
0
        /// <summary>
        /// Adds the links between the nodes of the graph.
        /// </summary>
        private void GenerateLinks()
        {
            Parallel.ForEach(
                this.GetNodes(),
                node =>
            {
                DeBruijnNode searchResult = null;
                IKmerData searchNodeValue = GetNewKmerData();
                string kmerString;
                string kmerStringRC;
                if (node.NodeDataOrientation)
                {
                    kmerString   = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength));
                    kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength));
                }
                else
                {
                    kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength));
                    kmerString   = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength));
                }

                string nextKmer;
                string nextKmerRC;

                // Right Extensions
                nextKmer   = kmerString.Substring(1);
                nextKmerRC = kmerStringRC.Substring(0, kmerLength - 1);
                for (int i = 0; i < DnaSymbols.Length; i++)
                {
                    string tmpNextKmer = nextKmer + DnaSymbols[i];
                    searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength);
                    searchResult = this.SearchTree(searchNodeValue);
                    if (searchResult != null)
                    {
                        node.SetExtensionNodes(true, searchResult.NodeDataOrientation, searchResult);
                    }
                    else
                    {
                        string tmpnextKmerRC = DnaSymbolsComplement[i] + nextKmerRC;
                        searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpnextKmerRC), this.kmerLength);
                        searchResult = this.SearchTree(searchNodeValue);
                        if (searchResult != null)
                        {
                            node.SetExtensionNodes(true, !searchResult.NodeDataOrientation, searchResult);
                        }
                    }
                }

                // Left Extensions
                nextKmer   = kmerString.Substring(0, kmerLength - 1);
                nextKmerRC = kmerStringRC.Substring(1);
                for (int i = 0; i < DnaSymbols.Length; i++)
                {
                    string tmpNextKmer = DnaSymbols[i] + nextKmer;
                    searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength);
                    searchResult = this.SearchTree(searchNodeValue);
                    if (searchResult != null)
                    {
                        node.SetExtensionNodes(false, searchResult.NodeDataOrientation, searchResult);
                    }
                    else
                    {
                        string tmpNextKmerRC = nextKmerRC + DnaSymbolsComplement[i];
                        searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmerRC), this.kmerLength);
                        searchResult = this.SearchTree(searchNodeValue);
                        if (searchResult != null)
                        {
                            node.SetExtensionNodes(false, !searchResult.NodeDataOrientation, searchResult);
                        }
                    }
                }
            });
        }
Esempio n. 34
0
        /// <summary>
        /// Sets the extension nodes of the current node.
        /// </summary>
        /// <param name="isForwardDirection">True indicates Right extension and false indicates left extension.</param>
        /// <param name="sameOrientation">Orientation of the connecting edge.</param>
        /// <param name="extensionNode">Node to which the extension is to be set.</param>
        public void SetExtensionNode(bool isForwardDirection, bool sameOrientation, DeBruijnNode extensionNode)
        {
            if (extensionNode == null)
            {
                return;
            }

            lock (this)
            {
                // First 4 bits Forward links orientation, next 4 bits reverse links orientation
                // If bit values are 1 then same orientation. If bit values are 0 then orientation is different.
                if (isForwardDirection)
                {
                    if (this.RightExtension0 == null)
                    {
                        this.RightExtension0            = extensionNode;
                        this.OrientationRightExtension0 = sameOrientation;
                    }
                    else if (this.RightExtension1 == null)
                    {
                        this.RightExtension1            = extensionNode;
                        this.OrientationRightExtension1 = sameOrientation;
                    }
                    else if (this.RightExtension2 == null)
                    {
                        this.RightExtension2            = extensionNode;
                        this.OrientationRightExtension2 = sameOrientation;
                    }
                    else if (this.RightExtension3 == null)
                    {
                        this.RightExtension3            = extensionNode;
                        this.OrientationRightExtension3 = sameOrientation;
                    }
                    else
                    {
                        throw new ArgumentException("Can't set more than four extensions.");
                    }

                    this.RightExtensionNodesCount += 1;
                }
                else
                {
                    if (this.LeftExtension0 == null)
                    {
                        this.LeftExtension0            = extensionNode;
                        this.OrientationLeftExtension0 = sameOrientation;
                    }
                    else if (this.LeftExtension1 == null)
                    {
                        this.LeftExtension1            = extensionNode;
                        this.OrientationLeftExtension1 = sameOrientation;
                    }
                    else if (this.LeftExtension2 == null)
                    {
                        this.LeftExtension2            = extensionNode;
                        this.OrientationLeftExtension2 = sameOrientation;
                    }
                    else if (this.LeftExtension3 == null)
                    {
                        this.LeftExtension3            = extensionNode;
                        this.OrientationLeftExtension3 = sameOrientation;
                    }
                    else
                    {
                        throw new ArgumentException("Can't set more than four extensions.");
                    }

                    this.LeftExtensionNodesCount += 1;
                }
            }
        }
Esempio n. 35
0
 /// <summary>
 /// Initializes a new instance of the DeBruijnPath class with specified node.
 /// </summary>
 /// <param name="node">Graph node.</param>
 public DeBruijnPath(DeBruijnNode node)
 {
     this.path = new List<DeBruijnNode> { node };
 }
Esempio n. 36
0
        /// <summary>
        /// Validate the DeBruijnNode ctor by passing the kmer and validating 
        /// the node object.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeCtor(string nodeName)
        {
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
            parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build the kmers using assembler
                this.KmerLength = int.Parse(kmerLength, null);
                this.SequenceReads.Clear();
                this.SetSequenceReads(sequenceReads.ToList());
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                node.SetExtensionNode(false, true, leftnode);

                Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.LeftExtensionNodesCount);


            ApplicationLog.WriteLine(
                "Padena BVT : DeBruijnNode ctor() validation for Padena step2 completed successfully");
        }
Esempio n. 37
0
        /// <summary>
        /// Sets the extension nodes of the current node.
        /// </summary>
        /// <param name="isForwardDirection">True indicates Right extension and false indicates left extension.</param>
        /// <param name="sameOrientation">Orientation of the connecting edge.</param>
        /// <param name="extensionNode">Node to which the extension is to be set.</param>
        public void SetExtensionNode(bool isForwardDirection, bool sameOrientation, DeBruijnNode extensionNode)
        {
            if (extensionNode == null)
            {
                return;
            }

            lock (this)
            {
                // First 4 bits Forward links orientation, next 4 bits reverse links orientation
                // If bit values are 1 then same orientation. If bit values are 0 then orientation is different.
                if (isForwardDirection)
                {                   
                  
                        if (this.RightExtension0 == null)
                        {
                            this.RightExtension0 = extensionNode;
                            this.OrientationRightExtension0 = sameOrientation;
                        }
                        else if (this.RightExtension1 == null)
                        {
                            this.RightExtension1 = extensionNode;
                            this.OrientationRightExtension1 = sameOrientation;
                        }
                        else if (this.RightExtension2 == null)
                        {
                            this.RightExtension2 = extensionNode;
                            this.OrientationRightExtension2 = sameOrientation;
                        }
                        else if (this.RightExtension3 == null)
                        {
                            this.RightExtension3 = extensionNode;
                            this.OrientationRightExtension3 = sameOrientation;
                        }
                        else
                        {
                            throw new ArgumentException("Can't set more than four extensions.");
                        }
                  
                    this.RightExtensionNodesCount += 1;
                }
                else
                {
                   
                        if (this.LeftExtension0 == null)
                        {
                            this.LeftExtension0 = extensionNode;
                            this.OrientationLeftExtension0 = sameOrientation;
                        }
                        else if (this.LeftExtension1 == null)
                        {
                            this.LeftExtension1 = extensionNode;
                            this.OrientationLeftExtension1 = sameOrientation;
                        }
                        else if (this.LeftExtension2 == null)
                        {
                            this.LeftExtension2 = extensionNode;
                            this.OrientationLeftExtension2 = sameOrientation;
                        }
                        else if (this.LeftExtension3 == null)
                        {
                            this.LeftExtension3 = extensionNode;
                            this.OrientationLeftExtension3 = sameOrientation;
                        }
                        else
                        {
                            throw new ArgumentException("Can't set more than four extensions.");
                        }
                    
                    this.LeftExtensionNodesCount += 1;
                }
            }
        }
Esempio n. 38
0
        /// <summary>
        /// Compact the node list by removing deleted nodes
        /// </summary>
        private void CompactDeletedNodesFromBigList()
        {
            //NOTE: Same method as CompactDeletedNodesFromList but using long instead of int
            //start 3 threads, one to find indexes to fill, one to find things to fill them with, and one to do the filling
            var lnodes = _nodes as BigList <DeBruijnNode>;

            if (lnodes == null)
            {
                throw new Exception("Tried to use node collection as list when it was null or another type");
            }

            BlockingCollection <long>         deletedFrontIndexes = new BlockingCollection <long>();
            BlockingCollection <DeBruijnNode> undeletedBackNodes  = new BlockingCollection <DeBruijnNode>();
            //task to find empty spots in top of list
            long emptySpotsFound       = 0;
            Task findEmptyFrontIndexes = Task.Factory.StartNew(() =>
            {
                Thread.BeginCriticalRegion();
                for (long curForward = 0; curForward < _nodeCount; curForward++)
                {
                    DeBruijnNode cnode = lnodes[curForward];
                    if (cnode.IsDeleted)
                    {
                        deletedFrontIndexes.Add(curForward);
                        emptySpotsFound++;
                    }
                }
                deletedFrontIndexes.CompleteAdding();
                Thread.EndCriticalRegion();
            });
            //task to find undeleted nodes in back of list
            long filledSpotsFound    = 0;
            Task findFullBackIndexes = Task.Factory.StartNew(() =>
            {
                Thread.BeginCriticalRegion();
                for (long curBackward = (lnodes.Count - 1); curBackward >= _nodeCount; curBackward--)
                {
                    DeBruijnNode cnode = lnodes[curBackward];
                    if (!cnode.IsDeleted)
                    {
                        undeletedBackNodes.Add(cnode);
                        filledSpotsFound++;
                    }
                }
                undeletedBackNodes.CompleteAdding();
                findEmptyFrontIndexes.Wait();
                //This will prevent the program from hanging if a bad area is found in the code so that there is nothing to fill an index
                if (emptySpotsFound != filledSpotsFound)
                {
                    throw new Exception("The node array in the graph has become corrupted, node count does not match the number of undeleted nodes");
                }
                Thread.EndCriticalRegion();
            });
            //task to move things that have been found in the back to the front
            Task moveNodes = Task.Factory.StartNew(() =>
            {
                Thread.BeginCriticalRegion();
                //the logic here requires that the items missing in the front match those in the back
                while (!deletedFrontIndexes.IsCompleted)
                {
                    DeBruijnNode tm; long index;
                    undeletedBackNodes.TryTake(out tm, -1);
                    deletedFrontIndexes.TryTake(out index, -1);
                    lnodes[index] = tm;
                }
            });

            Task.WaitAll(new Task[] { findEmptyFrontIndexes, findFullBackIndexes, moveNodes });
            //now the tail should only be deleted nodes and nodes that have been copied further up in the list
            lnodes.TrimToSize(_nodeCount);
        }
Esempio n. 39
0
        /// <summary>
        /// Gets the last or first symbol in the node depending on the isForwardDirection flag is true or false.
        /// If the isSameOrientation flag is false then symbol will be taken from the ReverseComplement of the kmer data.
        /// </summary>
        /// <param name="node">DeBruijn node.</param>
        /// <param name="isForwardDirection">Flag to indicate whether the node is in forward direction or not.</param>
        /// <param name="isSameOrientation">Flag to indicate the orientation.</param>
        /// <returns>Byte represnting the symbol.</returns>
        public byte GetNextSymbolFrom(DeBruijnNode node, bool isForwardDirection, bool isSameOrientation)
        {
            if (node == null)
                throw new ArgumentNullException("node");

            byte[] nextSequence = isSameOrientation 
                ? node.GetOriginalSymbols(KmerLength) 
                : node.GetReverseComplementOfOriginalSymbols(KmerLength);
            
            return isForwardDirection 
                ? nextSequence.Last() 
                : nextSequence.First();
        }
Esempio n. 40
0
        /// <summary>
        /// Removes all the invalid extensions permanently.
        /// </summary>
        public void PurgeInvalidExtensions()
        {
            if (this.RightExtension0 != null && this.InvalidRightExtension0)
            {
                this.RightExtension0 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension1 != null && this.InvalidRightExtension1)
            {
                this.RightExtension1 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension2 != null && this.InvalidRightExtension2)
            {
                this.RightExtension2 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension3 != null && this.InvalidRightExtension3)
            {
                this.RightExtension3 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.LeftExtension0 != null && this.InvalidLeftExtension0)
            {
                this.LeftExtension0 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension1 != null && this.InvalidLeftExtension1)
            {
                this.LeftExtension1 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension2 != null && this.InvalidLeftExtension2)
            {
                this.LeftExtension2 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension3 != null && this.InvalidLeftExtension3)
            {
                this.LeftExtension3 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }
        }
Esempio n. 41
0
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        /// <param name="destroyKmerManagerAfterwards">MT Assembler specific flag
        public void Build(IEnumerable <ISequence> sequences, bool destroyKmerManagerAfterwards = true)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            // Build the dictionary of kmers to debruijin nodes
            var kmerManager        = new KmerDictionary();
            var kmerDataCollection = new BlockingCollection <List <KmerData32> >();
            // Create the producer task
            Task theProducer = Task.Factory.StartNew(() =>
            {
                Thread.BeginCriticalRegion();
                try
                {
                    int i        = 0;
                    var kmerList = new List <KmerData32>(BlockSize);
                    // Generate the kmers from the sequences
                    foreach (ISequence sequence in sequences)
                    {
#if DEBUG
                        i++;
                        if (i % 50000 == 0)
                        {
                            //TODO: This is reported each 5 minutes anyway.
                            Console.WriteLine("Parsed: " + i.ToString() + " reads");
                        }
#endif
                        // if the sequence alphabet is not of type DNA then ignore it.
                        bool skipSequence = false;
                        if (sequence.Alphabet != Alphabets.NoGapDNA || sequence.Count < _kmerLength)
                        {
                            skipSequence = true;
#if FALSE
                            Console.WriteLine(sequence.Alphabet.ToString());
                            var qs            = sequence as Sequence;
                            var f             = new Sequence(qs);
                            var s             = f.ConvertToString();
                            byte[] acceptable = new byte[] { 65, 67, 71, 84 };
                            var s3            = new Sequence(qs.Alphabet, f.Where(x => !acceptable.Contains(x)).ToArray());

                            Console.WriteLine("BAD: " + s3.ConvertToString());
                            Console.WriteLine(f.ConvertToString());

                            //	var b = sequence as Sequence;
                            //Console.WriteLine((sequence as Sequence).ConvertToString());
#endif
                        }
                        if (skipSequence)
                        {
                            Interlocked.Increment(ref this._skippedSequencesCount);
                            Interlocked.Increment(ref this._processedSequencesCount);
                            continue;
                        }

                        // If the blocking collection count is exceeding 2 million kmers wait for 5 sec
                        // so that the task can remove some kmers and create the nodes.
                        // This will avoid OutofMemoryException
                        while (kmerDataCollection.Count > StopAddThreshold)
                        {
                            Thread.Sleep(2);
                        }

                        // Convert sequences to k-mers
                        var kmers = KmerData32.GetKmers(sequence, this.KmerLength);
                        kmerList.AddRange(kmers);

                        // Most reads are <=150 basepairs, so this should avoid having to grow the list
                        // by keeping it below blockSize
                        if (kmerList.Count > AddThreshold)
                        {
                            kmerDataCollection.Add(kmerList);
                            kmerList = new List <KmerData32>(BlockSize);
                        }

                        Interlocked.Increment(ref this._processedSequencesCount);
                        Thread.EndCriticalRegion();
                    }

                    if (kmerList.Count <= AddThreshold)
                    {
                        kmerDataCollection.Add(kmerList);
                    }
                }
                finally
                {
                    kmerDataCollection.CompleteAdding();
                }
            });

            if (true)// (!Bio.CrossPlatform.Environment.RunningInMono)
            {
                // Consume k-mers by adding them to binary tree structure as nodes
                Parallel.ForEach(kmerDataCollection.GetConsumingEnumerable(),
                                 new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Environment.ProcessorCount
                }, newKmerList =>
                {
                    foreach (KmerData32 newKmer in newKmerList)
                    {
                        // Create Vertex
                        DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer);
                        Debug.Assert(newKmer.KmerData == node.NodeValue.KmerData);
                    }
                });
            }
            else
            {
                foreach (var newKmerList in kmerDataCollection.GetConsumingEnumerable())
                {
                    foreach (KmerData32 newKmer in newKmerList)
                    {
                        // Create Vertex
                        DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer);
                        Debug.Assert(newKmer.KmerData == node.NodeValue.KmerData);
                    }
                }
            }
            // Done filling binary tree
            theProducer.Wait(); // Make sure task is finished - also rethrows any exception here.
            kmerDataCollection.Dispose();

            // NOTE: To speed enumeration make the nodes into an array and dispose of the collection
            this._nodeCount = kmerManager.NodeCount;
            this._nodes     = kmerManager.GenerateNodeArray();

            // Generate the links
            this.GenerateLinks(kmerManager);

            if (destroyKmerManagerAfterwards)
            {
                // Since we no longer need to search for values delete tree structure, also set left and right nodes of child array to null
                // So that they are available for GC if no longer needed
                kmerManager = null;
                foreach (DeBruijnNode node in _nodes)
                {
                    node.Left  = null;
                    node.Right = null;
                }
            }
            else
            {
                KmerManager = kmerManager;
            }
            this.GraphBuildCompleted = true;
        }
Esempio n. 42
0
        /// <summary>
        /// Starting from potential end of dangling link, trace back along 
        /// extension edges in graph to find if it is a valid dangling link.
        /// Parallelization Note: No locks used in TraceDanglingLink. 
        /// We only read graph structure here. No modifications are made.
        /// </summary>
        /// <param name="isForwardDirection">Boolean indicating direction of dangling link.</param>
        /// <param name="link">Dangling Link.</param>
        /// <param name="node">Node that is next on the link.</param>
        /// <param name="sameOrientation">Orientation of link.</param>
        /// <returns>List of nodes in dangling link.</returns>
        private DeBruijnPath TraceDanglingExtensionLink(bool isForwardDirection, DeBruijnPath link, DeBruijnNode node, bool sameOrientation)
        {
            for (; ;)
            {
                // Get extensions going in same and opposite directions.
                Dictionary<DeBruijnNode, bool> sameDirectionExtensions;
                int sameDirectionExtensionsCount, oppDirectionExtensionsCount;
                
                if (isForwardDirection ^ sameOrientation)
                {
                    sameDirectionExtensionsCount = node.LeftExtensionNodesCount;
                    oppDirectionExtensionsCount = node.RightExtensionNodesCount;
                    sameDirectionExtensions = node.GetLeftExtensionNodesWithOrientation();
                }
                else
                {
                    sameDirectionExtensionsCount = node.RightExtensionNodesCount;
                    oppDirectionExtensionsCount = node.LeftExtensionNodesCount;
                    sameDirectionExtensions = node.GetRightExtensionNodesWithOrientation();
                }

                bool reachedEndPoint;
                if (sameDirectionExtensionsCount == 0)
                {
                    // Found other end of dangling link
                    return CheckAndAddDanglingNode(link, node, out reachedEndPoint);
                }

                if (oppDirectionExtensionsCount > 1)
                {
                    // Have reached a point of ambiguity. Return list without updating it.
                    if (this.erodeThreshold != -1 && !node.IsMarkedForDelete)
                    {
                        lock (this.danglingLinkExtensionTasks)
                        {
                            // This task essentially just returns back to this method after other ones are removed
                            this.danglingLinkExtensionTasks.Add(new Task<int>(_ => 
                                ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, false), TaskCreationOptions.None));
                        }
                        return null;
                    }
                    return link;
                }

                if (sameDirectionExtensionsCount > 1)
                {
                    // Have reached a point of ambiguity. Return list after updating it.
                    link = CheckAndAddDanglingNode(link, node, out reachedEndPoint);
                    if (this.erodeThreshold != -1 && reachedEndPoint != true && !node.IsMarkedForDelete)
                    {
                        lock (this.danglingLinkExtensionTasks)
                        {
                            this.danglingLinkExtensionTasks.Add(new Task<int>(_ => 
                                ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, true), TaskCreationOptions.None));
                        }
                        return null;
                    }
                    return link;
                }

                // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1)
                // Continue trace back. Add this node to that list and recurse.
                link = CheckAndAddDanglingNode(link, node, out reachedEndPoint);
                if (reachedEndPoint)
                {
                    // Loop is found or threshold length has been exceeded.
                    return link;
                }

                // Still in loop, so just add the extension and keeps going
                var item = sameDirectionExtensions.First();
                node = item.Key;
                sameOrientation = !(sameOrientation ^ item.Value);
            }
        }
Esempio n. 43
0
        private void CompactDeletedNodesFromList()
        {
            //start 3 threads, one to find indexes to fill, one to find things to fill them with, and one to do the filling
            var lnodes = _nodes as List <DeBruijnNode>;

            if (lnodes == null)
            {
                throw new NullReferenceException("Tried to use node collection as list when it was null or another type");
            }
            BlockingCollection <int>          deletedFrontIndexes = new BlockingCollection <int>();
            BlockingCollection <DeBruijnNode> undeletedBackNodes  = new BlockingCollection <DeBruijnNode>();
            int spotsToFind = lnodes.Count - (int)_nodeCount;
            //task to find empty spots in top of list
            int  emptySpotsFound       = 0;
            Task findEmptyFrontIndexes = Task.Factory.StartNew(() =>
            {
                Thread.BeginCriticalRegion();
                for (int curForward = 0; curForward < _nodeCount && emptySpotsFound != spotsToFind; curForward++)
                {
                    DeBruijnNode cnode = lnodes[curForward];
                    if (cnode.IsDeleted)
                    {
                        deletedFrontIndexes.Add(curForward);
                        emptySpotsFound++;
                    }
                }
                deletedFrontIndexes.CompleteAdding();
                Thread.EndCriticalRegion();
            });
            //task to find undeleted nodes in back of list
            int  filledSpotsFound    = 0;
            Task findFullBackIndexes = Task.Factory.StartNew(() =>
            {
                Thread.BeginCriticalRegion();
                for (int curBackward = (lnodes.Count - 1); curBackward >= _nodeCount && filledSpotsFound != spotsToFind; curBackward--)
                {
                    DeBruijnNode cnode = lnodes[curBackward];
                    if (!cnode.IsDeleted)
                    {
                        undeletedBackNodes.Add(cnode);
                        filledSpotsFound++;
                    }
                }
                undeletedBackNodes.CompleteAdding();
                findEmptyFrontIndexes.Wait();
                //This will prevent the program from hanging if a bad area is found in the code so that there is nothing to fill an index
                if (emptySpotsFound != filledSpotsFound)
                {
                    throw new ApplicationException("The node array in the graph has become corrupted, node count does not match the number of undeleted nodes");
                }
                Thread.EndCriticalRegion();
            });
            //task to move things that have been found in the back to the front
            Task moveNodes = Task.Factory.StartNew(() =>
            {
                Thread.BeginCriticalRegion();
                //the logic here requires that the items missing in the front match those in the back
                while (!deletedFrontIndexes.IsCompleted && !undeletedBackNodes.IsCompleted)
                {
                    DeBruijnNode tm; int index;
                    tm    = undeletedBackNodes.Take();
                    index = deletedFrontIndexes.Take();
                    if (tm == null)
                    {
                        throw new NullReferenceException("Cannot move null node!");
                    }
                    lnodes[index] = tm;
                }
            });

            Task.WaitAll(new Task[] { findEmptyFrontIndexes, findFullBackIndexes, moveNodes });
            //now the tail should only be deleted nodes and nodes that have been copied further up in the list
            lnodes.RemoveRange((int)_nodeCount, lnodes.Count - (int)_nodeCount);
        }
Esempio n. 44
0
        /// <summary>
        /// Checks if 'node' can be added to 'link' without 
        /// violating any conditions pertaining to dangling links.
        /// Returns null if loop is found or length exceeds threshold.
        /// Otherwise, adds node to link and returns.
        /// </summary>
        /// <param name="link">Dangling link.</param>
        /// <param name="node">Node to be added.</param>
        /// <param name="reachedErrorEndPoint">Indicates if we have reached end of dangling link.</param>
        /// <returns>Updated dangling link.</returns>
        private DeBruijnPath CheckAndAddDanglingNode(DeBruijnPath link, DeBruijnNode node, out bool reachedErrorEndPoint)
        {
            if (this.erodeThreshold != -1 && link.PathNodes.Count == 0 && node.KmerCount < this.erodeThreshold)
            {
                if (node.IsMarkedForDelete)
                {
                    // There is a loop in this link. No need to update link. 
                    // Set flag for end point reached as true and return.
                    reachedErrorEndPoint = true;
                    return link;
                }
                
                node.MarkNodeForDelete();
                reachedErrorEndPoint = false;
                return link;
            }

            if (link.PathNodes.Contains(node))
            {
                // There is a loop in this link. No need to update link. 
                // Set flag for end point reached as true and return.
                reachedErrorEndPoint = true;
                return link;
            }

            if (link.PathNodes.Count >= LengthThreshold)
            {
                // Length crosses threshold. Not a dangling link.
                // So set reached error end point as true and return null.
                reachedErrorEndPoint = true;
                return null;
            }

            // No error conditions found. Add node to link.
            reachedErrorEndPoint = false;
            link.PathNodes.Add(node);
            return link;
        }
Esempio n. 45
0
        /// <summary>
        /// Removes edge corresponding to the node from appropriate data structure,
        /// after checking whether given node is part of left or right extensions.
        /// Thread-safe method.
        /// </summary>
        /// <param name="node">Node for which extension is to be removed.</param>
        public void RemoveExtensionThreadSafe(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            lock (this)
            {
                if (this.RightExtension0 == node)
                {
                    this.RightExtension0 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }

                if (this.RightExtension1 == node)
                {
                    this.RightExtension1 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }

                if (this.RightExtension2 == node)
                {
                    this.RightExtension2 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }

                if (this.RightExtension3 == node)
                {
                    this.RightExtension3 = null;
                    this.RightExtensionNodesCount--;
                    return;
                }
            }

            lock (this)
            {
                if (this.LeftExtension0 == node)
                {
                    this.LeftExtension0 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }

                if (this.LeftExtension1 == node)
                {
                    this.LeftExtension1 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }

                if (this.LeftExtension2 == node)
                {
                    this.LeftExtension2 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }

                if (this.LeftExtension3 == node)
                {
                    this.LeftExtension3 = null;
                    this.LeftExtensionNodesCount--;
                    return;
                }
            }
        }
Esempio n. 46
0
        /// <summary>
        /// Try and extend dangling links following
        /// graph clean-up after erosion.
        /// </summary>
        /// <param name="isForwardDirection">Boolean indicating direction of dangling link.</param>
        /// <param name="danglingLink">Dangling Link.</param>
        /// <param name="node">Node that is next on the link.</param>
        /// <param name="sameOrientation">Orientation of link.</param>
        /// <param name="removeLast">Boolean indicating if last node 
        /// in link has to be removed before extending.</param>
        /// <returns>Length of dangling link found after extension.</returns>
        private int ExtendDanglingLink(bool isForwardDirection, DeBruijnPath danglingLink, DeBruijnNode node, bool sameOrientation, bool removeLast)
        {
            if (removeLast)
            {
                danglingLink.PathNodes.Remove(node);
            }

            if (danglingLink.PathNodes.Count == 0)
            {
                // DanglingLink is empty. So check if node is an end-point.
                if (node.RightExtensionNodesCount == 0)
                {
                    danglingLink = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true);
                }
                else if (node.LeftExtensionNodesCount == 0)
                {
                    danglingLink = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true);
                }
                else
                {
                    // Not an end-point. Return length as 0
                    return 0;
                }
            }
            else
            {
                // Extend existing link.
                danglingLink = TraceDanglingExtensionLink(isForwardDirection, danglingLink, node, sameOrientation);
            }

            // Return length of dangling link found.
            return danglingLink == null ? 0 : danglingLink.PathNodes.Count;

        }
Esempio n. 47
0
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        public void Build(IEnumerable <ISequence> sequences)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (this.kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            if (this.kmerLength > 32)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthGreaterThan32);
            }

            BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>();

            Task createKmers = Task.Factory.StartNew(() =>
            {
                IAlphabet alphabet = Alphabets.DNA;

                HashSet <byte> gapSymbols;
                alphabet.TryGetGapSymbols(out gapSymbols);

                // Generate the kmers from the sequences
                foreach (ISequence sequence in sequences)
                {
                    // if the sequence alphabet is not of type DNA then ignore it.
                    if (sequence.Alphabet != Alphabets.DNA)
                    {
                        Interlocked.Increment(ref this.skippedSequencesCount);
                        Interlocked.Increment(ref this.processedSequencesCount);
                        continue;
                    }

                    // if the sequence contains any gap symbols then ignore the sequence.
                    bool skipSequence = false;
                    foreach (byte symbol in gapSymbols)
                    {
                        for (long index = 0; index < sequence.Count; ++index)
                        {
                            if (sequence[index] == symbol)
                            {
                                skipSequence = true;
                                break;
                            }
                        }

                        if (skipSequence)
                        {
                            break;
                        }
                    }

                    if (skipSequence)
                    {
                        Interlocked.Increment(ref this.skippedSequencesCount);
                        Interlocked.Increment(ref this.processedSequencesCount);
                        continue;
                    }

                    // if the blocking collection count is exceeding 2 million wait for 5 sec
                    // so that the task can remove some kmers and creat the nodes.
                    // This will avoid OutofMemoryException
                    while (kmerDataCollection.Count > 2000000)
                    {
                        System.Threading.Thread.Sleep(5);
                    }

                    long count = sequence.Count;

                    // generate the kmers from each sequence
                    for (long i = 0; i <= count - this.kmerLength; ++i)
                    {
                        IKmerData kmerData = this.GetNewKmerData();
                        bool orientation   = kmerData.SetKmerData(sequence, i, this.kmerLength);
                        kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1));
                    }

                    Interlocked.Increment(ref this.processedSequencesCount);
                }

                kmerDataCollection.CompleteAdding();
            });

            Task buildKmers = Task.Factory.StartNew(() =>
            {
                while (!kmerDataCollection.IsCompleted)
                {
                    DeBruijnNode newNode = null;
                    if (kmerDataCollection.TryTake(out newNode, -1))
                    {
                        // Tree Node Creation

                        // create a new node
                        if (this.root == null)   // first element being added
                        {
                            this.root = newNode; // set node as root of the tree
                            this.NodeCount++;
                            newNode = null;
                            continue;
                        }

                        int result          = 0;
                        DeBruijnNode temp   = this.root;
                        DeBruijnNode parent = this.root;

                        // Search the tree where the new node should be inserted
                        while (temp != null)
                        {
                            result = newNode.NodeValue.CompareTo(temp.NodeValue);
                            if (result == 0)
                            {
                                if (temp.KmerCount <= 255)
                                {
                                    temp.KmerCount++;
                                    break;
                                }
                            }
                            else if (result > 0) // move to right sub-tree
                            {
                                parent = temp;
                                temp   = temp.Right;
                            }
                            else if (result < 0) // move to left sub-tree
                            {
                                parent = temp;
                                temp   = temp.Left;
                            }
                        }

                        // position found
                        if (result > 0) // add as right child
                        {
                            parent.Right = newNode;
                            NodeCount++;
                        }
                        else if (result < 0) // add as left child
                        {
                            parent.Left = newNode;
                            NodeCount++;
                        }
                    } // End of tree node creation.
                }
            });

            Task.WaitAll(createKmers, buildKmers);

            kmerDataCollection.Dispose();
            this.GraphBuildCompleted = true;

            // Generate the links
            this.GenerateLinks();
        }
Esempio n. 48
0
        /// <summary>
        /// Makes extension edge corresponding to the node invalid,
        /// after checking whether given node is part of left or right extensions.
        /// Not Thread-safe. Use lock at caller if required.
        /// </summary>
        /// <param name="node">Node for which extension is to be made invalid.</param>
        public void MarkExtensionInvalid(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            if (!this.MarkRightExtensionAsInvalid(node))
            {
                this.MarkLeftExtensionAsInvalid(node);
            }
        }
Esempio n. 49
0
        /// <summary>
        /// Deletes the extension nodes those are marked for deletion.
        /// </summary>
        public void RemoveMarkedExtensions()
        {
            // If node is marked for deletion, ignore it. No need for any update.
            if (this.IsMarkedForDelete)
            {
                return;
            }

            if (this.RightExtension0 != null && this.RightExtension0.IsMarkedForDelete)
            {
                this.RightExtension0 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension1 != null && this.RightExtension1.IsMarkedForDelete)
            {
                this.RightExtension1 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension2 != null && this.RightExtension2.IsMarkedForDelete)
            {
                this.RightExtension2 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.RightExtension3 != null && this.RightExtension3.IsMarkedForDelete)
            {
                this.RightExtension3 = null;
                lock (this)
                {
                    this.RightExtensionNodesCount--;
                }
            }

            if (this.LeftExtension0 != null && this.LeftExtension0.IsMarkedForDelete)
            {
                this.LeftExtension0 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension1 != null && this.LeftExtension1.IsMarkedForDelete)
            {
                this.LeftExtension1 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension2 != null && this.LeftExtension2.IsMarkedForDelete)
            {
                this.LeftExtension2 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }

            if (this.LeftExtension3 != null && this.LeftExtension3.IsMarkedForDelete)
            {
                this.LeftExtension3 = null;
                lock (this)
                {
                    this.LeftExtensionNodesCount--;
                }
            }
        }
Esempio n. 50
0
        /// <summary>
        /// Traces diverging paths in given direction.
        /// For each path in the set of diverging paths, extend path by one node
        /// at a time. Continue this until all diverging paths converge to a 
        /// single node or length threshold is exceeded.
        /// If paths converge, add path cluster containing list of redundant 
        /// path nodes to list of redundant paths and return.
        /// </summary>
        /// <param name="startNode">Node at starting point of divergence.</param>
        /// <param name="divergingNodes">List of diverging nodes.</param>
        /// <param name="isForwardExtension">Bool indicating direction of divergence.</param>
        /// <param name="redundantPaths">List of redundant paths.</param>
        private void TraceDivergingExtensionPaths(
            DeBruijnNode startNode,
            Dictionary<DeBruijnNode, bool> divergingNodes,
            bool isForwardExtension,
            List<DeBruijnPathList> redundantPaths)
        {
            List<PathWithOrientation> divergingPaths = new List<PathWithOrientation>(
                divergingNodes.Select(n =>
                    new PathWithOrientation(startNode, n.Key, n.Value)));
            int divergingPathLengh = 2;

            // Extend paths till length threshold is exceeded.
            // In case paths coverge within threshold, we break out of while.
            while (divergingPathLengh <= this.pathLengthThreshold)
            {
                // Extension is possible only if end point of all paths has exactly one extension
                // In case extensions count is 0, no extensions possible for some path (or)
                // if extensions is more than 1, they are diverging further. Not considered a redundant path
                if (divergingPaths.Any(p => ((isForwardExtension ^ p.IsSameOrientation) ?
                      p.Nodes.Last().LeftExtensionNodesCount : p.Nodes.Last().RightExtensionNodesCount) != 1))
                {
                    return;
                }

                // Extend each path in cluster. While performing path extension 
                // also keep track of whether they have converged
                bool hasConverged = true;
                foreach (PathWithOrientation path in divergingPaths)
                {
                    DeBruijnNode endNode = path.Nodes.Last();
                    Dictionary<DeBruijnNode, bool> extensions
                        = (isForwardExtension ^ path.IsSameOrientation) ? endNode.GetLeftExtensionNodesWithOrientation() : endNode.GetRightExtensionNodesWithOrientation();

                    KeyValuePair<DeBruijnNode, bool> nextNode = extensions.First();
                    if (path.Nodes.Contains(nextNode.Key))
                    {
                        // Loop in path
                        return;
                    }
                    else
                    {
                        // Update path orientation
                        path.IsSameOrientation = !(path.IsSameOrientation ^ nextNode.Value);
                        path.Nodes.Add(nextNode.Key);

                        // Check if paths so far are converged
                        if (hasConverged && nextNode.Key != divergingPaths.First().Nodes.Last())
                        {
                            // Last node added is different. Paths do not converge
                            hasConverged = false;
                        }
                    }
                }

                divergingPathLengh++;

                // Paths have been extended. Check for convergence
                if (hasConverged)
                {
                    // Note: all paths have the same end node.
                    lock (redundantPaths)
                    {
                        // Redundant paths found
                        redundantPaths.Add(new DeBruijnPathList(divergingPaths.Select(p => new DeBruijnPath(p.Nodes))));
                    }

                    return;
                }
            }
        }
Esempio n. 51
0
 /// <summary>
 /// Initializes a new instance of the DeBruijnPath class with specified node.
 /// </summary>
 /// <param name="node">Graph node.</param>
 public DeBruijnPath(DeBruijnNode node)
 {
     this.path = new List <DeBruijnNode> {
         node
     };
 }
Esempio n. 52
0
        /// <summary>
        /// Gets the sequence from the specified node.
        /// </summary>
        /// <param name="node">DeBruijn node.</param>
        /// <returns>Returns an instance of sequence.</returns>
        public ISequence GetNodeSequence(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            return new Sequence(Alphabets.DNA, node.GetOriginalSymbols(KmerLength));
        }