/// <summary> /// Marks the RightExtensions of the current node as invalid. /// </summary> /// <param name="node">Debruijn node which matches one of the right extensions of the current node.</param> public bool MarkRightExtensionAsInvalid(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } if (this.RightExtension0 == node) { this.InvalidRightExtension0 = true; return(true); } else if (this.RightExtension1 == node) { this.InvalidRightExtension1 = true; return(true); } else if (this.RightExtension2 == node) { this.InvalidRightExtension2 = true; return(true); } else if (this.RightExtension3 == node) { this.InvalidRightExtension3 = true; return(true); } return(false); }
/// <summary> /// Check if input node is null /// </summary> /// <param name="node">Input node</param> private static void ValidateNode(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } }
/// <summary> /// Searches for a particular node in the tree. /// </summary> /// <param name="kmerValue">The node to be searched.</param> /// <returns>Actual node in the tree.</returns> public DeBruijnNode SearchTree(IKmerData kmerValue) { // this should never happen. if (kmerValue == null) { return(null); } DeBruijnNode startNode = this.root; while (startNode != null) { int result = kmerValue.CompareTo(startNode.NodeValue); // parameter value found if (result == 0) { break; } else if (result < 0) { // Search left if the value is smaller than the current node startNode = startNode.Left; // search left } else { startNode = startNode.Right; // search right } } return(startNode); }
/// <summary> /// Gets the sequence for kmer associated with input node. /// Uses index and position information along with base sequence /// to construct sequence. /// There should be atleast one valid position in the node. /// Since all positions indicate the same kmer sequence, /// the position information from the first kmer is used /// to construct the sequence /// </summary> /// <param name="node">Graph Node</param> /// <returns>Sequence associated with input node</returns> public ISequence GetNodeSequence(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } // Get sequence index and validate int sequenceIndex = node.SequenceIndex; if (sequenceIndex < 0 || sequenceIndex >= _baseSequence.Count) { throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerIndexOutOfRange); } // Get base sequence, position and validate ISequence baseSequence = _baseSequence[sequenceIndex]; int position = node.KmerPosition; if (position < 0 || position + node.KmerLength > baseSequence.Count) { throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerPositionOutOfRange); } if (position == 0 && baseSequence.Count == node.KmerLength) { return(baseSequence); } return(baseSequence.Range(position, node.KmerLength)); }
/// <summary> /// Gets the sequence from the specified node. /// </summary> /// <param name="node">DeBruijn node.</param> /// <returns>Returns an instance of sequence.</returns> public ISequence GetNodeSequence(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } return(new Sequence(Alphabets.DNA, node.GetOriginalSymbols(this.KmerLength))); }
/// <summary> /// Adds the links between the nodes of the graph. /// </summary> private void GenerateLinks(KmerDictionary kmerManager) { // Prepare a mask to remove the bits representing the first nucleotide (or left most bits in the encoded kmer) // First calculate how many bits do you have to move down a character until you are at the start of the kmer encoded sequence int distancetoShift = 2 * (KmerLength - 1); ulong rightMask = ~(((ulong)3) << distancetoShift); Parallel.ForEach(_nodes, node => { DeBruijnNode searchResult = null; KmerData32 searchNodeValue = new KmerData32(); // Right Extensions - Remove first position from the value // Remove the left most value by using an exclusive ulong nextKmer = node.NodeValue.KmerData & rightMask; // Move it over two to get make a position for the next pair of bits to represent a new nucleotide nextKmer = nextKmer << 2; for (ulong i = 0; i < 4; i++) { ulong tmpNextKmer = nextKmer | i; // Equivalent to "ACGTA"+"N" where N is the 0-3 encoding for A,C,G,T // Now to set the kmer value to this, the orientationForward value is equal to false if the // reverse compliment of the kmer is used instead of the kmer value itself. bool matchIsRC = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength); searchResult = kmerManager.TryGetOld(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(true, matchIsRC, searchResult); } } // Left Extensions nextKmer = node.NodeValue.KmerData; //Chop off the right most basepair nextKmer >>= 2; for (ulong i = 0; i < 4; i++) // Cycle through A,C,G,T { // Add the character on to the left side of the kmer // Equivalent to "N" + "ACGAT" where the basepair is added on as the 2 bits ulong tmpNextKmer = (i << distancetoShift) | nextKmer; bool matchIsRC = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength); searchResult = kmerManager.TryGetOld(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(false, matchIsRC, searchResult); } } }); LinkGenerationCompleted = true; }
/// <summary> /// Makes extension edge corresponding to the node invalid, /// after checking whether given node is part of left or right extensions. /// Not Thread-safe. Use lock at caller if required. /// </summary> /// <param name="node">Node for which extension is to be made invalid</param> public void MarkExtensionInvalid(DeBruijnNode node) { ValidateNode(node); if (_rightEndExtensionNodes.ContainsKey(node)) { _rightEndExtensionNodes[node].IsValid = false; } else if (_leftEndExtensionNodes.ContainsKey(node)) { _leftEndExtensionNodes[node].IsValid = false; } }
/// <summary> /// Makes extension edge corresponding to the node invalid, /// after checking whether given node is part of left or right extensions. /// Not Thread-safe. Use lock at caller if required. /// </summary> /// <param name="node">Node for which extension is to be made invalid.</param> public void MarkExtensionInvalid(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } if (!this.MarkRightExtensionAsInvalid(node)) { this.MarkLeftExtensionAsInvalid(node); } }
/// <summary> /// Gets the last or first symbol in the node depending on the isForwardDirection flag is true or false. /// If the isSameOrientation flag is false then symbol will be taken from the ReverseComplement of the kmer data. /// </summary> /// <param name="node">DeBruijn node.</param> /// <param name="isForwardDirection">Flag to indicate whether the node is in forward direction or not.</param> /// <param name="isSameOrientation">Flag to indicate the orientation.</param> /// <returns>Byte represnting the symbol.</returns> public byte GetNextSymbolFrom(DeBruijnNode node, bool isForwardDirection, bool isSameOrientation) { if (node == null) { throw new ArgumentNullException("node"); } byte[] nextSequence = isSameOrientation ? node.GetOriginalSymbols(KmerLength) : node.GetReverseComplementOfOriginalSymbols(KmerLength); return(isForwardDirection ? nextSequence.Last() : nextSequence.First()); }
/// <summary> /// Add node with given orientation to right extension edges. /// Not thread-safe. Use lock at caller if required. /// </summary> /// <param name="node">Node to add right-extension to</param> /// <param name="isSameOrientation">Orientation of connecting edge</param> public void AddRightEndExtension(DeBruijnNode node, bool isSameOrientation) { ValidateNode(node); DeBruijnEdge edge; if (_rightEndExtensionNodes.TryGetValue(node, out edge)) { _rightEndExtensionNodes[node].IsSameOrientation ^= isSameOrientation; } else { _rightEndExtensionNodes[node] = new DeBruijnEdge(isSameOrientation); } }
/// <summary> /// Tries to add specified value to the BinaryTree. /// If the value is already present in the tree then this method returns the value already in the tree. /// Useful when two values that are equal by comparison are not equal by reference. /// </summary> /// <param name="value">Value to add.</param> /// <returns>Returns the value added or already in the tree, else returns false.</returns> public DeBruijnNode AddOrReturnCurrent(KmerData32 value) { DeBruijnNode toReturn; if (_root == null) { toReturn = MakeNewNode(value); _root = toReturn; } else { ulong newKey = value.KmerData; DeBruijnNode node = _root; while (true) { ulong currentKey = node.NodeValue.KmerData; if (currentKey == newKey) { // key already exists. toReturn = node; break; } if (newKey < currentKey) { // go to left. if (node.Left == null) { toReturn = MakeNewNode(value); node.Left = toReturn; break; } node = node.Left; } else { // go to right. if (node.Right == null) { toReturn = MakeNewNode(value); node.Right = toReturn; break; } node = node.Right; } } } return(toReturn); }
/// <summary> /// Initializes a new instance of the PathWithOrientation class. /// </summary> /// <param name="node1">First node to add.</param> /// <param name="node2">Second node to add.</param> /// <param name="orientation">Path orientation.</param> public PathWithOrientation(DeBruijnNode node1, DeBruijnNode node2, bool orientation) { if (node1 == null) { throw new ArgumentNullException("node1"); } if (node2 == null) { throw new ArgumentNullException("node2"); } this.nodes = new List<DeBruijnNode> { node1, node2 }; this.IsSameOrientation = orientation; }
/// <summary> /// Tries to add specified value to the BinaryTree. /// If the value is already present in the tree then this method returns the value already in the tree. /// Useful when two values that are equal by comparison are not equal by reference. /// </summary> /// <param name="value">Value to add.</param> /// <returns>Returns the value added or already in the tree, else returns false.</returns> public DeBruijnNode AddOrReturnCurrent(KmerData32 value) { DeBruijnNode toReturn; if (_root == null) { toReturn = MakeNewNode(value); _root = toReturn; } else { ulong newKey = value.KmerData; DeBruijnNode node = _root; while (true) { ulong currentKey = node.NodeValue.KmerData; if (currentKey == newKey) { // key already exists. toReturn = node; break; } if (newKey < currentKey) { // go to left. if (node.Left == null) { toReturn = MakeNewNode(value); node.Left = toReturn; break; } node = node.Left; } else { // go to right. if (node.Right == null) { toReturn = MakeNewNode(value); node.Right = toReturn; break; } node = node.Right; } } } return toReturn; }
/// <summary> /// Removes edge corresponding to the node from appropriate data structure, /// after checking whether given node is part of left or right extensions. /// Thread-safe method /// </summary> /// <param name="node">Node for which extension is to be removed</param> public void RemoveExtensionThreadSafe(DeBruijnNode node) { ValidateNode(node); bool removed; lock (_rightEndExtensionNodes) { removed = _rightEndExtensionNodes.Remove(node); } if (!removed) { lock (_leftEndExtensionNodes) { _leftEndExtensionNodes.Remove(node); } } }
/// <summary> /// Searches for a particular node in the tree. /// </summary> /// <param name="kmerValue">The node to be searched.</param> /// <returns>Actual node in the tree.</returns> public DeBruijnNode SearchTree(KmerData32 kmerValue) { DeBruijnNode startNode = Root; while (startNode != null) { int result = kmerValue.CompareTo(startNode.NodeValue); if (result == 0) // not found { break; } // Search left if the value is smaller than the current node startNode = result < 0 ? startNode.Left : startNode.Right; } return(startNode); }
/// <summary> /// Searches for a particular node in the tree. /// </summary> /// <param name="kmerValue">The node to be searched.</param> /// <returns>Actual node in the tree.</returns> public DeBruijnNode SearchTree(KmerData32 kmerValue) { DeBruijnNode startNode = _root; while (startNode != null) { ulong currentValue = startNode.NodeValue.KmerData; // parameter value found if (currentValue == kmerValue.KmerData) { break; } startNode = kmerValue.KmerData < currentValue ? startNode.Left : startNode.Right; } return(startNode); }
public IEnumerable <DeBruijnNode> GetNodes() { if (Count > 0) { var traversalStack = new Stack <DeBruijnNode>((int)Math.Log(Count, 2.0)); traversalStack.Push(_root); while (traversalStack.Count > 0) { DeBruijnNode current = traversalStack.Pop(); if (current != null) { traversalStack.Push(current.Right); traversalStack.Push(current.Left); if (!current.IsDeleted) { yield return(current); } } } } }
/// <summary> /// Gets the nodes present in this graph. /// Nodes marked for delete are not returned. /// </summary> /// <returns>The list of all available nodes in the graph.</returns> public IEnumerable <DeBruijnNode> GetNodes() { var traversalStack = new Stack <DeBruijnNode>(); traversalStack.Push(Root); while (traversalStack.Count > 0) { DeBruijnNode current = traversalStack.Pop(); if (current != null) { traversalStack.Push(current.Right); traversalStack.Push(current.Left); if (!current.IsDeleted) { yield return(current); } } } traversalStack.TrimExcess(); }
/// <summary> /// This gets the next symbol from a node while forming chains. This can be made a lot more efficient if it turns in to a bottleneck. /// all chains are extended from either the first or last base present in the node, and this base is either forward /// or reverse complimented, this method reflects this. /// </summary> /// <param name="node">Next node</param> /// <param name="graph">Graph to get symbol from</param> /// <param name="GetFirstNotLast">First or last base?</param> /// <param name="ReverseComplimentBase">Should the compliment of the base be returned</param> /// <returns></returns> private static byte GetNextSymbol(DeBruijnNode node, int kmerLength, bool GetRCofFirstBaseInsteadOfLastBase) { if (node == null) { throw new ArgumentNullException("node"); } byte[] symbols = node.GetOriginalSymbols(kmerLength); byte value = GetRCofFirstBaseInsteadOfLastBase ? symbols.First() : symbols.Last(); if (GetRCofFirstBaseInsteadOfLastBase) { byte value2; bool rced = DnaAlphabet.Instance.TryGetComplementSymbol(value, out value2); //Should never happend if (!rced) { throw new Exception("Could not revcomp base during graph construction"); } value = value2; } return(value); }
/// <summary> /// Builds a contig graph from kmer graph using contig data information. /// Creates a graph node for each contig, computes adjacency /// for contig graph using edge information in kmer graph. /// Finally, all kmer nodes are deleted from the graph. /// </summary> /// <param name="contigs">List of contig data</param> /// <param name="kmerLength">Kmer length</param> public void BuildContigGraph(IList <ISequence> contigs, int kmerLength) { if (contigs == null) { throw new ArgumentNullException("contigs"); } if (kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive); } // Create contig nodes DeBruijnNode[] contigNodes = new DeBruijnNode[contigs.Count]; Parallel.For(0, contigs.Count, ndx => contigNodes[ndx] = new DeBruijnNode(contigs[ndx].Count, ndx)); GenerateContigAdjacency(contigs, kmerLength, contigNodes); // Update graph with new nodes _baseSequence = new List <ISequence>(contigs); _kmerNodes = new HashSet <DeBruijnNode>(contigNodes); }
/// <summary> /// Marks the RightExtensions of the current node as invalid. /// </summary> /// <param name="node">Debruijn node which matches one of the right extensions of the current node.</param> public bool MarkRightExtensionAsInvalid(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } if (this.RightExtension0 == node) { this.InvalidRightExtension0 = true; return true; } else if (this.RightExtension1 == node) { this.InvalidRightExtension1 = true; return true; } else if (this.RightExtension2 == node) { this.InvalidRightExtension2 = true; return true; } else if (this.RightExtension3 == node) { this.InvalidRightExtension3 = true; return true; } return false; }
/// <summary> /// Removes all the invalid extensions permanently. /// </summary> public void PurgeInvalidExtensions() { if (this.RightExtension0 != null && this.InvalidRightExtension0) { this.RightExtension0 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension1 != null && this.InvalidRightExtension1) { this.RightExtension1 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension2 != null && this.InvalidRightExtension2) { this.RightExtension2 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension3 != null && this.InvalidRightExtension3) { this.RightExtension3 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.LeftExtension0 != null && this.InvalidLeftExtension0) { this.LeftExtension0 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension1 != null && this.InvalidLeftExtension1) { this.LeftExtension1 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension2 != null && this.InvalidLeftExtension2) { this.LeftExtension2 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension3 != null && this.InvalidLeftExtension3) { this.LeftExtension3 = null; lock (this) { this.LeftExtensionNodesCount--; } } }
/// <summary> /// Trace simple path starting from 'node' in specified direction. /// </summary> /// <param name="assembledContigs">List of assembled contigs.</param> /// <param name="node">Starting node of contig path.</param> /// <param name="isForwardDirection">Boolean indicating direction of path.</param> /// <param name="createContigSequences">Boolean indicating whether the contig sequences are to be created or not.</param> /// <param name="DuplicatesPossible">Boolean indicating if duplicates are possible, true if both the forward and reverse path could be generated</param> private void TraceSimplePath(List<ISequence> assembledContigs, DeBruijnNode node, bool isForwardDirection, bool createContigSequences,bool DuplicatesPossible) { ISequence nodeSequence = _graph.GetNodeSequence(node); List<byte> contigSequence = new List<byte>(nodeSequence); node.IsVisited = true; List<DeBruijnNode> contigPath = new List<DeBruijnNode> { node }; KeyValuePair<DeBruijnNode, bool> nextNode = isForwardDirection ? node.GetRightExtensionNodesWithOrientation().First() : node.GetLeftExtensionNodesWithOrientation().First(); TraceSimplePathLinks(contigPath, contigSequence, isForwardDirection, nextNode.Value, nextNode.Key, createContigSequences); // Check to remove duplicates if (!DuplicatesPossible || contigPath[0].NodeValue.CompareTo(contigPath.Last().NodeValue) >= 0) { // Check contig coverage. if (_coverageThreshold != -1) { // Definition from Velvet Manual: http://helix.nih.gov/Applications/velvet_manual.pdf // "k-mer coverage" is how many times a k-mer has been seen among the reads. double coverage = contigPath.Average(n => n.KmerCount); if (coverage < _coverageThreshold) { contigPath.ForEach(n => n.MarkNodeForDelete()); } } else { if (createContigSequences) { lock (assembledContigs) { assembledContigs.Add(new Sequence(nodeSequence.Alphabet, contigSequence.ToArray())); } } } } }
/// <summary> /// Deletes the extension nodes those are marked for deletion. /// </summary> public void RemoveMarkedExtensions() { // If node is marked for deletion, ignore it. No need for any update. if (this.IsMarkedForDelete) { return; } if (this.RightExtension0 != null && this.RightExtension0.IsMarkedForDelete) { this.RightExtension0 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension1 != null && this.RightExtension1.IsMarkedForDelete) { this.RightExtension1 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension2 != null && this.RightExtension2.IsMarkedForDelete) { this.RightExtension2 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension3 != null && this.RightExtension3.IsMarkedForDelete) { this.RightExtension3 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.LeftExtension0 != null && this.LeftExtension0.IsMarkedForDelete) { this.LeftExtension0 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension1 != null && this.LeftExtension1.IsMarkedForDelete) { this.LeftExtension1 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension2 != null && this.LeftExtension2.IsMarkedForDelete) { this.LeftExtension2 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension3 != null && this.LeftExtension3.IsMarkedForDelete) { this.LeftExtension3 = null; lock (this) { this.LeftExtensionNodesCount--; } } }
/// <summary> /// Validate the DeBruijnNode ctor by passing the kmer and validating /// the node object. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeCtor(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string nodeExtensionsCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodeExtensionsCountNode); string kmersCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmersCountNode); string leftNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LeftNodeExtensionsCountNode); string rightNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RightNodeExtensionsCountNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Build the kmers using this this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>( (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); node.SetExtensionNode(true, true, rightnode); // Validate DeBruijnNode class properties. Assert.AreEqual(nodeExtensionsCount, node.ExtensionsCount.ToString((IFormatProvider)null)); Assert.AreEqual(kmersCount, node.KmerCount.ToString((IFormatProvider)null)); Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null)); } ApplicationLog.WriteLine("Padena P1 : DeBruijnNode ctor() validation for Padena step2 completed successfully"); }
/// <summary> /// Build graph nodes and edges from list of k-mers. /// Creates a node for every unique k-mer (and reverse-complement) /// in the read. Then, generates adjacency information between nodes /// by computing pairs of nodes that have overlapping regions /// between node sequences. /// </summary> /// <param name="sequences">List of input sequences.</param> public void Build(IEnumerable <ISequence> sequences) { if (sequences == null) { throw new ArgumentNullException("sequences"); } if (this.kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive); } BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>(); Task buildKmers = Task.Factory.StartNew(() => { while (!kmerDataCollection.IsCompleted) { DeBruijnNode newNode = null; if (kmerDataCollection.TryTake(out newNode, -1)) { // Tree Node Creation // create a new node if (this.root == null) // first element being added { this.root = newNode; // set node as root of the tree this.NodeCount++; continue; } int result = 0; DeBruijnNode temp = this.root; DeBruijnNode parent = this.root; // Search the tree where the new node should be inserted while (temp != null) { result = newNode.NodeValue.CompareTo(temp.NodeValue); if (result == 0) { if (temp.KmerCount <= 255) { temp.KmerCount++; break; } } else if (result > 0) // move to right sub-tree { parent = temp; temp = temp.Right; } else if (result < 0) // move to left sub-tree { parent = temp; temp = temp.Left; } } // position found if (result > 0) // add as right child { parent.Right = newNode; NodeCount++; } else if (result < 0) // add as left child { parent.Left = newNode; NodeCount++; } } // End of tree node creation. } }); IAlphabet alphabet = sequences.First().Alphabet; byte[] symbolMap = alphabet.GetSymbolValueMap(); HashSet <byte> ambiguousSymbols = alphabet.GetAmbiguousSymbols(); HashSet <byte> gapSymbols; alphabet.TryGetGapSymbols(out gapSymbols); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { // if the blocking collection count is exceeding 2 million wait for 5 sec // so that the task can remove some kmers and creat the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > 2000000) { System.Threading.Thread.Sleep(5); } long count = sequence.Count; byte[] convertedSymbols = new byte[count]; bool skipSequence = false; for (long index = 0; index < count; index++) { convertedSymbols[index] = symbolMap[sequence[index]]; if (ambiguousSymbols.Contains(convertedSymbols[index]) || gapSymbols.Contains(convertedSymbols[index])) { skipSequence = true; break; } } if (skipSequence) { continue; } Sequence convertedSequence = new Sequence(sequence.Alphabet, convertedSymbols, false); // generate the kmers from each sequence for (long i = 0; i <= count - this.kmerLength; ++i) { IKmerData kmerData = this.GetNewKmerData(); bool orientation = kmerData.SetKmerData(convertedSequence, i, this.kmerLength); kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1)); } } kmerDataCollection.CompleteAdding(); Task.WaitAll(buildKmers); kmerDataCollection.Dispose(); // Generate the links this.GenerateLinks(); }
/// <summary> /// Validate AddRightEndExtension() method of DeBruijnNode /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeAddRightExtension(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> sequenceReads = parser.Parse(); // Build kmers from step1 this.KmerLength = int.Parse(kmerLength, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode rightNode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(true, true, rightNode); Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.RightExtensionNodesCount); } ApplicationLog.WriteLine(@"Padena BVT :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully"); }
/// <summary> /// Validate RemoveExtension() method of DeBruijnNode /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeRemoveExtension(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1 this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>( (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); node.SetExtensionNode(true, true, rightnode); // Validates count before removing right and left extension nodes. Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.RightExtensionNodesCount); Assert.AreEqual(1, node.RightExtensionNodesCount); Assert.AreEqual(1, node.LeftExtensionNodesCount); // Remove right and left extension nodes. node.RemoveExtensionThreadSafe(rightnode); node.RemoveExtensionThreadSafe(leftnode); // Validate node after removing right and left extensions. Assert.AreEqual(0, node.RightExtensionNodesCount); Assert.AreEqual(0, node.LeftExtensionNodesCount); ApplicationLog.WriteLine(@"Padena P1 :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully"); }
/// <summary> /// Removes edge corresponding to the node from appropriate data structure, /// after checking whether given node is part of left or right extensions. /// Thread-safe method. /// </summary> /// <param name="node">Node for which extension is to be removed.</param> public void RemoveExtensionThreadSafe(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } lock (this) { if (this.RightExtension0 == node) { this.RightExtension0 = null; this.RightExtensionNodesCount--; return; } if (this.RightExtension1 == node) { this.RightExtension1 = null; this.RightExtensionNodesCount--; return; } if (this.RightExtension2 == node) { this.RightExtension2 = null; this.RightExtensionNodesCount--; return; } if (this.RightExtension3 == node) { this.RightExtension3 = null; this.RightExtensionNodesCount--; return; } } lock (this) { if (this.LeftExtension0 == node) { this.LeftExtension0 = null; this.LeftExtensionNodesCount--; return; } if (this.LeftExtension1 == node) { this.LeftExtension1 = null; this.LeftExtensionNodesCount--; return; } if (this.LeftExtension2 == node) { this.LeftExtension2 = null; this.LeftExtensionNodesCount--; return; } if (this.LeftExtension3 == node) { this.LeftExtension3 = null; this.LeftExtensionNodesCount--; return; } } }
public void Build(IEnumerable <ISequence> sequences) { // Size of Kmer List to grab, somewhat arbitrary but want to keep list size below large object threshold, which is ~85 kb const int blockSize = 4096; // When to add list to blocking collection, most short reads are <=151 bp so this should avoid needing to grow the list const int addThreshold = blockSize - 151; // When to pause adding const int stopAddThreshold = 2000000 / blockSize; if (sequences == null) { throw new ArgumentNullException("sequences"); } if (KmerLength > KmerData32.MAX_KMER_LENGTH) { throw new ArgumentException(Properties.Resource.KmerLengthGreaterThan31); } // A dictionary kmers to debruijin nodes KmerDictionary kmerManager = new KmerDictionary(); // Create the producer thread. var kmerDataCollection = new BlockingCollection <List <KmerData32> >(); Task producer = Task.Factory.StartNew(() => { try { List <KmerData32> kmerList = new List <KmerData32>(blockSize); IAlphabet alphabet = Alphabets.DNA; HashSet <byte> gapSymbols; alphabet.TryGetGapSymbols(out gapSymbols); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { // if the sequence alphabet is not of type DNA then ignore it. bool skipSequence = false; if (sequence.Alphabet != Alphabets.DNA) { skipSequence = true; } else { // if the sequence contains any gap symbols then ignore the sequence. foreach (byte symbol in gapSymbols) { for (long index = 0; index < sequence.Count; ++index) { if (sequence[index] == symbol) { skipSequence = true; break; } } if (skipSequence) { break; } } } if (skipSequence) { Interlocked.Increment(ref _skippedSequencesCount); Interlocked.Increment(ref _processedSequencesCount); continue; } // if the blocking collection count is exceeding 2 million kmers wait for 2 sec // so that the task can remove some kmers and create the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > stopAddThreshold) { Task.Delay(TimeSpan.FromSeconds(2)).Wait(); } // Convert sequences to k-mers kmerList.AddRange(KmerData32.GetKmers(sequence, KmerLength)); // Most reads are <=150 basepairs, so this should avoid having to grow the list // by keeping it below blockSize if (kmerList.Count > addThreshold) { kmerDataCollection.Add(kmerList); kmerList = new List <KmerData32>(4092); } Interlocked.Increment(ref _processedSequencesCount); } if (kmerList.Count <= addThreshold) { kmerDataCollection.Add(kmerList); } } finally { kmerDataCollection.CompleteAdding(); } }); // Consume k-mers by addding them to binary tree structure as nodes Parallel.ForEach(kmerDataCollection.GetConsumingEnumerable(), newKmerList => { foreach (KmerData32 newKmer in newKmerList) { // Create Vertex DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer); // Need to lock node if doing this in parallel if (node.KmerCount <= 255) { lock (node) { node.KmerCount++; } } } }); // Ensure producer exceptions are handled. producer.Wait(); // Done filling binary tree kmerDataCollection.Dispose(); //NOTE: To speed enumeration make the nodes into an array and dispose of the collection _nodeCount = kmerManager.NodeCount; _nodes = kmerManager.GenerateNodeArray(); // Generate the links GenerateLinks(kmerManager); // Since we no longer need to search for values set left and right nodes of child array to null // so that they are available for GC if no longer needed foreach (DeBruijnNode node in _nodes) { node.Left = node.Right = null; } GraphBuildCompleted = true; }
/// <summary> /// Checks if 'node' can be added to 'path' without causing a loop. /// If yes, adds node to path and returns true. If not, returns false. /// </summary> /// <param name="contigPath">List of graph nodes corresponding to contig path.</param> /// <param name="contigSequence">Sequence of contig being assembled.</param> /// <param name="nextNode">Next node on the path to be added.</param> /// <param name="isForwardDirection">Boolean indicating direction.</param> /// <param name="isSameOrientation">Boolean indicating orientation.</param> /// <param name="createContigSequences">Boolean indicating whether contig sequences are to be created or not.</param> /// <returns>Boolean indicating if path was updated successfully.</returns> private bool CheckAndAddNode( List<DeBruijnNode> contigPath, List<byte> contigSequence, DeBruijnNode nextNode, bool isForwardDirection, bool isSameOrientation, bool createContigSequences) { // Since ambiguous extensions have been removed, the only way a link could be in the list // is if the first item in the list points to this item if (contigPath.Count>0 && contigPath.Contains(nextNode)) //contigPath[0]==nextNode) { // there is a loop in this link // Return false indicating no update has been made return false; } // Add node to contig list contigPath.Add(nextNode); if (createContigSequences) { // Update contig sequence with sequence from next node byte symbol = _graph.GetNextSymbolFrom(nextNode, isForwardDirection, isSameOrientation); if (isForwardDirection) { contigSequence.Add(symbol); } else { contigSequence.Insert(0, symbol); } } return true; }
/// <summary> /// Trace simple path in specified direction. /// </summary> /// <param name="contigPath">List of graph nodes corresponding to contig path.</param> /// <param name="contigSequence">Sequence of contig being assembled.</param> /// <param name="isForwardDirection">Boolean indicating direction of path.</param> /// <param name="sameOrientation">Path orientation.</param> /// <param name="node">Next node on the path.</param> /// <param name="createContigSequences">Indicates whether the contig sequences are to be created or not.</param> private void TraceSimplePathLinks( List<DeBruijnNode> contigPath, List<byte> contigSequence, bool isForwardDirection, bool sameOrientation, DeBruijnNode node, bool createContigSequences) { bool endFound = false; while (!endFound) { node.IsVisited = true; // Get extensions going in same directions. Dictionary<DeBruijnNode, bool> sameDirectionExtensions = (isForwardDirection ^ sameOrientation) ? node.GetLeftExtensionNodesWithOrientation() : node.GetRightExtensionNodesWithOrientation(); if (sameDirectionExtensions.Count == 0) { // Found end of path. Add this and return CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences); endFound = true; } else { var sameDirectionExtension = sameDirectionExtensions.First(); // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1) // Continue traceback in the same direction. Add this node to list and continue. if (!CheckAndAddNode(contigPath, contigSequence, node, isForwardDirection, sameOrientation, createContigSequences)) { // Loop is found. Cannot extend simple path further //Assuming that any node with extensions >2 from either side have been trimmed, this should only be possible if the first //node in list is last node as well, this means there is a circle in the graph of length >1, going to report it if (contigPath != null && contigPath.Count > 0 && contigPath[0] == node) { endFound = true; } } else { node = sameDirectionExtension.Key; sameOrientation = !(sameOrientation ^ sameDirectionExtension.Value); } } } }
/// <summary> /// Adds the links between the nodes of the graph. /// </summary> private void GenerateLinks() { Parallel.ForEach( this.GetNodes(), node => { DeBruijnNode searchResult = null; IKmerData searchNodeValue = GetNewKmerData(); string kmerString; string kmerStringRC; if (node.NodeDataOrientation) { kmerString = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength)); kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength)); } else { kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength)); kmerString = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength)); } string nextKmer; string nextKmerRC; // Right Extensions nextKmer = kmerString.Substring(1); nextKmerRC = kmerStringRC.Substring(0, kmerLength - 1); for (int i = 0; i < DnaSymbols.Length; i++) { string tmpNextKmer = nextKmer + DnaSymbols[i]; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength); searchResult = this.SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNodes(true, searchResult.NodeDataOrientation, searchResult); } else { string tmpnextKmerRC = DnaSymbolsComplement[i] + nextKmerRC; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpnextKmerRC), this.kmerLength); searchResult = this.SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNodes(true, !searchResult.NodeDataOrientation, searchResult); } } } // Left Extensions nextKmer = kmerString.Substring(0, kmerLength - 1); nextKmerRC = kmerStringRC.Substring(1); for (int i = 0; i < DnaSymbols.Length; i++) { string tmpNextKmer = DnaSymbols[i] + nextKmer; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength); searchResult = this.SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNodes(false, searchResult.NodeDataOrientation, searchResult); } else { string tmpNextKmerRC = nextKmerRC + DnaSymbolsComplement[i]; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmerRC), this.kmerLength); searchResult = this.SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNodes(false, !searchResult.NodeDataOrientation, searchResult); } } } }); }
/// <summary> /// Sets the extension nodes of the current node. /// </summary> /// <param name="isForwardDirection">True indicates Right extension and false indicates left extension.</param> /// <param name="sameOrientation">Orientation of the connecting edge.</param> /// <param name="extensionNode">Node to which the extension is to be set.</param> public void SetExtensionNode(bool isForwardDirection, bool sameOrientation, DeBruijnNode extensionNode) { if (extensionNode == null) { return; } lock (this) { // First 4 bits Forward links orientation, next 4 bits reverse links orientation // If bit values are 1 then same orientation. If bit values are 0 then orientation is different. if (isForwardDirection) { if (this.RightExtension0 == null) { this.RightExtension0 = extensionNode; this.OrientationRightExtension0 = sameOrientation; } else if (this.RightExtension1 == null) { this.RightExtension1 = extensionNode; this.OrientationRightExtension1 = sameOrientation; } else if (this.RightExtension2 == null) { this.RightExtension2 = extensionNode; this.OrientationRightExtension2 = sameOrientation; } else if (this.RightExtension3 == null) { this.RightExtension3 = extensionNode; this.OrientationRightExtension3 = sameOrientation; } else { throw new ArgumentException("Can't set more than four extensions."); } this.RightExtensionNodesCount += 1; } else { if (this.LeftExtension0 == null) { this.LeftExtension0 = extensionNode; this.OrientationLeftExtension0 = sameOrientation; } else if (this.LeftExtension1 == null) { this.LeftExtension1 = extensionNode; this.OrientationLeftExtension1 = sameOrientation; } else if (this.LeftExtension2 == null) { this.LeftExtension2 = extensionNode; this.OrientationLeftExtension2 = sameOrientation; } else if (this.LeftExtension3 == null) { this.LeftExtension3 = extensionNode; this.OrientationLeftExtension3 = sameOrientation; } else { throw new ArgumentException("Can't set more than four extensions."); } this.LeftExtensionNodesCount += 1; } } }
/// <summary> /// Initializes a new instance of the DeBruijnPath class with specified node. /// </summary> /// <param name="node">Graph node.</param> public DeBruijnPath(DeBruijnNode node) { this.path = new List<DeBruijnNode> { node }; }
/// <summary> /// Validate the DeBruijnNode ctor by passing the kmer and validating /// the node object. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeCtor(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build the kmers using assembler this.KmerLength = int.Parse(kmerLength, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.LeftExtensionNodesCount); ApplicationLog.WriteLine( "Padena BVT : DeBruijnNode ctor() validation for Padena step2 completed successfully"); }
/// <summary> /// Sets the extension nodes of the current node. /// </summary> /// <param name="isForwardDirection">True indicates Right extension and false indicates left extension.</param> /// <param name="sameOrientation">Orientation of the connecting edge.</param> /// <param name="extensionNode">Node to which the extension is to be set.</param> public void SetExtensionNode(bool isForwardDirection, bool sameOrientation, DeBruijnNode extensionNode) { if (extensionNode == null) { return; } lock (this) { // First 4 bits Forward links orientation, next 4 bits reverse links orientation // If bit values are 1 then same orientation. If bit values are 0 then orientation is different. if (isForwardDirection) { if (this.RightExtension0 == null) { this.RightExtension0 = extensionNode; this.OrientationRightExtension0 = sameOrientation; } else if (this.RightExtension1 == null) { this.RightExtension1 = extensionNode; this.OrientationRightExtension1 = sameOrientation; } else if (this.RightExtension2 == null) { this.RightExtension2 = extensionNode; this.OrientationRightExtension2 = sameOrientation; } else if (this.RightExtension3 == null) { this.RightExtension3 = extensionNode; this.OrientationRightExtension3 = sameOrientation; } else { throw new ArgumentException("Can't set more than four extensions."); } this.RightExtensionNodesCount += 1; } else { if (this.LeftExtension0 == null) { this.LeftExtension0 = extensionNode; this.OrientationLeftExtension0 = sameOrientation; } else if (this.LeftExtension1 == null) { this.LeftExtension1 = extensionNode; this.OrientationLeftExtension1 = sameOrientation; } else if (this.LeftExtension2 == null) { this.LeftExtension2 = extensionNode; this.OrientationLeftExtension2 = sameOrientation; } else if (this.LeftExtension3 == null) { this.LeftExtension3 = extensionNode; this.OrientationLeftExtension3 = sameOrientation; } else { throw new ArgumentException("Can't set more than four extensions."); } this.LeftExtensionNodesCount += 1; } } }
/// <summary> /// Compact the node list by removing deleted nodes /// </summary> private void CompactDeletedNodesFromBigList() { //NOTE: Same method as CompactDeletedNodesFromList but using long instead of int //start 3 threads, one to find indexes to fill, one to find things to fill them with, and one to do the filling var lnodes = _nodes as BigList <DeBruijnNode>; if (lnodes == null) { throw new Exception("Tried to use node collection as list when it was null or another type"); } BlockingCollection <long> deletedFrontIndexes = new BlockingCollection <long>(); BlockingCollection <DeBruijnNode> undeletedBackNodes = new BlockingCollection <DeBruijnNode>(); //task to find empty spots in top of list long emptySpotsFound = 0; Task findEmptyFrontIndexes = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); for (long curForward = 0; curForward < _nodeCount; curForward++) { DeBruijnNode cnode = lnodes[curForward]; if (cnode.IsDeleted) { deletedFrontIndexes.Add(curForward); emptySpotsFound++; } } deletedFrontIndexes.CompleteAdding(); Thread.EndCriticalRegion(); }); //task to find undeleted nodes in back of list long filledSpotsFound = 0; Task findFullBackIndexes = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); for (long curBackward = (lnodes.Count - 1); curBackward >= _nodeCount; curBackward--) { DeBruijnNode cnode = lnodes[curBackward]; if (!cnode.IsDeleted) { undeletedBackNodes.Add(cnode); filledSpotsFound++; } } undeletedBackNodes.CompleteAdding(); findEmptyFrontIndexes.Wait(); //This will prevent the program from hanging if a bad area is found in the code so that there is nothing to fill an index if (emptySpotsFound != filledSpotsFound) { throw new Exception("The node array in the graph has become corrupted, node count does not match the number of undeleted nodes"); } Thread.EndCriticalRegion(); }); //task to move things that have been found in the back to the front Task moveNodes = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); //the logic here requires that the items missing in the front match those in the back while (!deletedFrontIndexes.IsCompleted) { DeBruijnNode tm; long index; undeletedBackNodes.TryTake(out tm, -1); deletedFrontIndexes.TryTake(out index, -1); lnodes[index] = tm; } }); Task.WaitAll(new Task[] { findEmptyFrontIndexes, findFullBackIndexes, moveNodes }); //now the tail should only be deleted nodes and nodes that have been copied further up in the list lnodes.TrimToSize(_nodeCount); }
/// <summary> /// Gets the last or first symbol in the node depending on the isForwardDirection flag is true or false. /// If the isSameOrientation flag is false then symbol will be taken from the ReverseComplement of the kmer data. /// </summary> /// <param name="node">DeBruijn node.</param> /// <param name="isForwardDirection">Flag to indicate whether the node is in forward direction or not.</param> /// <param name="isSameOrientation">Flag to indicate the orientation.</param> /// <returns>Byte represnting the symbol.</returns> public byte GetNextSymbolFrom(DeBruijnNode node, bool isForwardDirection, bool isSameOrientation) { if (node == null) throw new ArgumentNullException("node"); byte[] nextSequence = isSameOrientation ? node.GetOriginalSymbols(KmerLength) : node.GetReverseComplementOfOriginalSymbols(KmerLength); return isForwardDirection ? nextSequence.Last() : nextSequence.First(); }
/// <summary> /// Removes all the invalid extensions permanently. /// </summary> public void PurgeInvalidExtensions() { if (this.RightExtension0 != null && this.InvalidRightExtension0) { this.RightExtension0 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension1 != null && this.InvalidRightExtension1) { this.RightExtension1 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension2 != null && this.InvalidRightExtension2) { this.RightExtension2 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension3 != null && this.InvalidRightExtension3) { this.RightExtension3 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.LeftExtension0 != null && this.InvalidLeftExtension0) { this.LeftExtension0 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension1 != null && this.InvalidLeftExtension1) { this.LeftExtension1 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension2 != null && this.InvalidLeftExtension2) { this.LeftExtension2 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension3 != null && this.InvalidLeftExtension3) { this.LeftExtension3 = null; lock (this) { this.LeftExtensionNodesCount--; } } }
/// <summary> /// Build graph nodes and edges from list of k-mers. /// Creates a node for every unique k-mer (and reverse-complement) /// in the read. Then, generates adjacency information between nodes /// by computing pairs of nodes that have overlapping regions /// between node sequences. /// </summary> /// <param name="sequences">List of input sequences.</param> /// <param name="destroyKmerManagerAfterwards">MT Assembler specific flag public void Build(IEnumerable <ISequence> sequences, bool destroyKmerManagerAfterwards = true) { if (sequences == null) { throw new ArgumentNullException("sequences"); } // Build the dictionary of kmers to debruijin nodes var kmerManager = new KmerDictionary(); var kmerDataCollection = new BlockingCollection <List <KmerData32> >(); // Create the producer task Task theProducer = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); try { int i = 0; var kmerList = new List <KmerData32>(BlockSize); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { #if DEBUG i++; if (i % 50000 == 0) { //TODO: This is reported each 5 minutes anyway. Console.WriteLine("Parsed: " + i.ToString() + " reads"); } #endif // if the sequence alphabet is not of type DNA then ignore it. bool skipSequence = false; if (sequence.Alphabet != Alphabets.NoGapDNA || sequence.Count < _kmerLength) { skipSequence = true; #if FALSE Console.WriteLine(sequence.Alphabet.ToString()); var qs = sequence as Sequence; var f = new Sequence(qs); var s = f.ConvertToString(); byte[] acceptable = new byte[] { 65, 67, 71, 84 }; var s3 = new Sequence(qs.Alphabet, f.Where(x => !acceptable.Contains(x)).ToArray()); Console.WriteLine("BAD: " + s3.ConvertToString()); Console.WriteLine(f.ConvertToString()); // var b = sequence as Sequence; //Console.WriteLine((sequence as Sequence).ConvertToString()); #endif } if (skipSequence) { Interlocked.Increment(ref this._skippedSequencesCount); Interlocked.Increment(ref this._processedSequencesCount); continue; } // If the blocking collection count is exceeding 2 million kmers wait for 5 sec // so that the task can remove some kmers and create the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > StopAddThreshold) { Thread.Sleep(2); } // Convert sequences to k-mers var kmers = KmerData32.GetKmers(sequence, this.KmerLength); kmerList.AddRange(kmers); // Most reads are <=150 basepairs, so this should avoid having to grow the list // by keeping it below blockSize if (kmerList.Count > AddThreshold) { kmerDataCollection.Add(kmerList); kmerList = new List <KmerData32>(BlockSize); } Interlocked.Increment(ref this._processedSequencesCount); Thread.EndCriticalRegion(); } if (kmerList.Count <= AddThreshold) { kmerDataCollection.Add(kmerList); } } finally { kmerDataCollection.CompleteAdding(); } }); if (true)// (!Bio.CrossPlatform.Environment.RunningInMono) { // Consume k-mers by adding them to binary tree structure as nodes Parallel.ForEach(kmerDataCollection.GetConsumingEnumerable(), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, newKmerList => { foreach (KmerData32 newKmer in newKmerList) { // Create Vertex DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer); Debug.Assert(newKmer.KmerData == node.NodeValue.KmerData); } }); } else { foreach (var newKmerList in kmerDataCollection.GetConsumingEnumerable()) { foreach (KmerData32 newKmer in newKmerList) { // Create Vertex DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer); Debug.Assert(newKmer.KmerData == node.NodeValue.KmerData); } } } // Done filling binary tree theProducer.Wait(); // Make sure task is finished - also rethrows any exception here. kmerDataCollection.Dispose(); // NOTE: To speed enumeration make the nodes into an array and dispose of the collection this._nodeCount = kmerManager.NodeCount; this._nodes = kmerManager.GenerateNodeArray(); // Generate the links this.GenerateLinks(kmerManager); if (destroyKmerManagerAfterwards) { // Since we no longer need to search for values delete tree structure, also set left and right nodes of child array to null // So that they are available for GC if no longer needed kmerManager = null; foreach (DeBruijnNode node in _nodes) { node.Left = null; node.Right = null; } } else { KmerManager = kmerManager; } this.GraphBuildCompleted = true; }
/// <summary> /// Starting from potential end of dangling link, trace back along /// extension edges in graph to find if it is a valid dangling link. /// Parallelization Note: No locks used in TraceDanglingLink. /// We only read graph structure here. No modifications are made. /// </summary> /// <param name="isForwardDirection">Boolean indicating direction of dangling link.</param> /// <param name="link">Dangling Link.</param> /// <param name="node">Node that is next on the link.</param> /// <param name="sameOrientation">Orientation of link.</param> /// <returns>List of nodes in dangling link.</returns> private DeBruijnPath TraceDanglingExtensionLink(bool isForwardDirection, DeBruijnPath link, DeBruijnNode node, bool sameOrientation) { for (; ;) { // Get extensions going in same and opposite directions. Dictionary<DeBruijnNode, bool> sameDirectionExtensions; int sameDirectionExtensionsCount, oppDirectionExtensionsCount; if (isForwardDirection ^ sameOrientation) { sameDirectionExtensionsCount = node.LeftExtensionNodesCount; oppDirectionExtensionsCount = node.RightExtensionNodesCount; sameDirectionExtensions = node.GetLeftExtensionNodesWithOrientation(); } else { sameDirectionExtensionsCount = node.RightExtensionNodesCount; oppDirectionExtensionsCount = node.LeftExtensionNodesCount; sameDirectionExtensions = node.GetRightExtensionNodesWithOrientation(); } bool reachedEndPoint; if (sameDirectionExtensionsCount == 0) { // Found other end of dangling link return CheckAndAddDanglingNode(link, node, out reachedEndPoint); } if (oppDirectionExtensionsCount > 1) { // Have reached a point of ambiguity. Return list without updating it. if (this.erodeThreshold != -1 && !node.IsMarkedForDelete) { lock (this.danglingLinkExtensionTasks) { // This task essentially just returns back to this method after other ones are removed this.danglingLinkExtensionTasks.Add(new Task<int>(_ => ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, false), TaskCreationOptions.None)); } return null; } return link; } if (sameDirectionExtensionsCount > 1) { // Have reached a point of ambiguity. Return list after updating it. link = CheckAndAddDanglingNode(link, node, out reachedEndPoint); if (this.erodeThreshold != -1 && reachedEndPoint != true && !node.IsMarkedForDelete) { lock (this.danglingLinkExtensionTasks) { this.danglingLinkExtensionTasks.Add(new Task<int>(_ => ExtendDanglingLink(isForwardDirection, link, node, sameOrientation, true), TaskCreationOptions.None)); } return null; } return link; } // (sameDirectionExtensions == 1 && oppDirectionExtensions == 1) // Continue trace back. Add this node to that list and recurse. link = CheckAndAddDanglingNode(link, node, out reachedEndPoint); if (reachedEndPoint) { // Loop is found or threshold length has been exceeded. return link; } // Still in loop, so just add the extension and keeps going var item = sameDirectionExtensions.First(); node = item.Key; sameOrientation = !(sameOrientation ^ item.Value); } }
private void CompactDeletedNodesFromList() { //start 3 threads, one to find indexes to fill, one to find things to fill them with, and one to do the filling var lnodes = _nodes as List <DeBruijnNode>; if (lnodes == null) { throw new NullReferenceException("Tried to use node collection as list when it was null or another type"); } BlockingCollection <int> deletedFrontIndexes = new BlockingCollection <int>(); BlockingCollection <DeBruijnNode> undeletedBackNodes = new BlockingCollection <DeBruijnNode>(); int spotsToFind = lnodes.Count - (int)_nodeCount; //task to find empty spots in top of list int emptySpotsFound = 0; Task findEmptyFrontIndexes = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); for (int curForward = 0; curForward < _nodeCount && emptySpotsFound != spotsToFind; curForward++) { DeBruijnNode cnode = lnodes[curForward]; if (cnode.IsDeleted) { deletedFrontIndexes.Add(curForward); emptySpotsFound++; } } deletedFrontIndexes.CompleteAdding(); Thread.EndCriticalRegion(); }); //task to find undeleted nodes in back of list int filledSpotsFound = 0; Task findFullBackIndexes = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); for (int curBackward = (lnodes.Count - 1); curBackward >= _nodeCount && filledSpotsFound != spotsToFind; curBackward--) { DeBruijnNode cnode = lnodes[curBackward]; if (!cnode.IsDeleted) { undeletedBackNodes.Add(cnode); filledSpotsFound++; } } undeletedBackNodes.CompleteAdding(); findEmptyFrontIndexes.Wait(); //This will prevent the program from hanging if a bad area is found in the code so that there is nothing to fill an index if (emptySpotsFound != filledSpotsFound) { throw new ApplicationException("The node array in the graph has become corrupted, node count does not match the number of undeleted nodes"); } Thread.EndCriticalRegion(); }); //task to move things that have been found in the back to the front Task moveNodes = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); //the logic here requires that the items missing in the front match those in the back while (!deletedFrontIndexes.IsCompleted && !undeletedBackNodes.IsCompleted) { DeBruijnNode tm; int index; tm = undeletedBackNodes.Take(); index = deletedFrontIndexes.Take(); if (tm == null) { throw new NullReferenceException("Cannot move null node!"); } lnodes[index] = tm; } }); Task.WaitAll(new Task[] { findEmptyFrontIndexes, findFullBackIndexes, moveNodes }); //now the tail should only be deleted nodes and nodes that have been copied further up in the list lnodes.RemoveRange((int)_nodeCount, lnodes.Count - (int)_nodeCount); }
/// <summary> /// Checks if 'node' can be added to 'link' without /// violating any conditions pertaining to dangling links. /// Returns null if loop is found or length exceeds threshold. /// Otherwise, adds node to link and returns. /// </summary> /// <param name="link">Dangling link.</param> /// <param name="node">Node to be added.</param> /// <param name="reachedErrorEndPoint">Indicates if we have reached end of dangling link.</param> /// <returns>Updated dangling link.</returns> private DeBruijnPath CheckAndAddDanglingNode(DeBruijnPath link, DeBruijnNode node, out bool reachedErrorEndPoint) { if (this.erodeThreshold != -1 && link.PathNodes.Count == 0 && node.KmerCount < this.erodeThreshold) { if (node.IsMarkedForDelete) { // There is a loop in this link. No need to update link. // Set flag for end point reached as true and return. reachedErrorEndPoint = true; return link; } node.MarkNodeForDelete(); reachedErrorEndPoint = false; return link; } if (link.PathNodes.Contains(node)) { // There is a loop in this link. No need to update link. // Set flag for end point reached as true and return. reachedErrorEndPoint = true; return link; } if (link.PathNodes.Count >= LengthThreshold) { // Length crosses threshold. Not a dangling link. // So set reached error end point as true and return null. reachedErrorEndPoint = true; return null; } // No error conditions found. Add node to link. reachedErrorEndPoint = false; link.PathNodes.Add(node); return link; }
/// <summary> /// Removes edge corresponding to the node from appropriate data structure, /// after checking whether given node is part of left or right extensions. /// Thread-safe method. /// </summary> /// <param name="node">Node for which extension is to be removed.</param> public void RemoveExtensionThreadSafe(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } lock (this) { if (this.RightExtension0 == node) { this.RightExtension0 = null; this.RightExtensionNodesCount--; return; } if (this.RightExtension1 == node) { this.RightExtension1 = null; this.RightExtensionNodesCount--; return; } if (this.RightExtension2 == node) { this.RightExtension2 = null; this.RightExtensionNodesCount--; return; } if (this.RightExtension3 == node) { this.RightExtension3 = null; this.RightExtensionNodesCount--; return; } } lock (this) { if (this.LeftExtension0 == node) { this.LeftExtension0 = null; this.LeftExtensionNodesCount--; return; } if (this.LeftExtension1 == node) { this.LeftExtension1 = null; this.LeftExtensionNodesCount--; return; } if (this.LeftExtension2 == node) { this.LeftExtension2 = null; this.LeftExtensionNodesCount--; return; } if (this.LeftExtension3 == node) { this.LeftExtension3 = null; this.LeftExtensionNodesCount--; return; } } }
/// <summary> /// Try and extend dangling links following /// graph clean-up after erosion. /// </summary> /// <param name="isForwardDirection">Boolean indicating direction of dangling link.</param> /// <param name="danglingLink">Dangling Link.</param> /// <param name="node">Node that is next on the link.</param> /// <param name="sameOrientation">Orientation of link.</param> /// <param name="removeLast">Boolean indicating if last node /// in link has to be removed before extending.</param> /// <returns>Length of dangling link found after extension.</returns> private int ExtendDanglingLink(bool isForwardDirection, DeBruijnPath danglingLink, DeBruijnNode node, bool sameOrientation, bool removeLast) { if (removeLast) { danglingLink.PathNodes.Remove(node); } if (danglingLink.PathNodes.Count == 0) { // DanglingLink is empty. So check if node is an end-point. if (node.RightExtensionNodesCount == 0) { danglingLink = TraceDanglingExtensionLink(false, new DeBruijnPath(), node, true); } else if (node.LeftExtensionNodesCount == 0) { danglingLink = TraceDanglingExtensionLink(true, new DeBruijnPath(), node, true); } else { // Not an end-point. Return length as 0 return 0; } } else { // Extend existing link. danglingLink = TraceDanglingExtensionLink(isForwardDirection, danglingLink, node, sameOrientation); } // Return length of dangling link found. return danglingLink == null ? 0 : danglingLink.PathNodes.Count; }
/// <summary> /// Build graph nodes and edges from list of k-mers. /// Creates a node for every unique k-mer (and reverse-complement) /// in the read. Then, generates adjacency information between nodes /// by computing pairs of nodes that have overlapping regions /// between node sequences. /// </summary> /// <param name="sequences">List of input sequences.</param> public void Build(IEnumerable <ISequence> sequences) { if (sequences == null) { throw new ArgumentNullException("sequences"); } if (this.kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive); } if (this.kmerLength > 32) { throw new ArgumentException(Properties.Resource.KmerLengthGreaterThan32); } BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>(); Task createKmers = Task.Factory.StartNew(() => { IAlphabet alphabet = Alphabets.DNA; HashSet <byte> gapSymbols; alphabet.TryGetGapSymbols(out gapSymbols); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { // if the sequence alphabet is not of type DNA then ignore it. if (sequence.Alphabet != Alphabets.DNA) { Interlocked.Increment(ref this.skippedSequencesCount); Interlocked.Increment(ref this.processedSequencesCount); continue; } // if the sequence contains any gap symbols then ignore the sequence. bool skipSequence = false; foreach (byte symbol in gapSymbols) { for (long index = 0; index < sequence.Count; ++index) { if (sequence[index] == symbol) { skipSequence = true; break; } } if (skipSequence) { break; } } if (skipSequence) { Interlocked.Increment(ref this.skippedSequencesCount); Interlocked.Increment(ref this.processedSequencesCount); continue; } // if the blocking collection count is exceeding 2 million wait for 5 sec // so that the task can remove some kmers and creat the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > 2000000) { System.Threading.Thread.Sleep(5); } long count = sequence.Count; // generate the kmers from each sequence for (long i = 0; i <= count - this.kmerLength; ++i) { IKmerData kmerData = this.GetNewKmerData(); bool orientation = kmerData.SetKmerData(sequence, i, this.kmerLength); kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1)); } Interlocked.Increment(ref this.processedSequencesCount); } kmerDataCollection.CompleteAdding(); }); Task buildKmers = Task.Factory.StartNew(() => { while (!kmerDataCollection.IsCompleted) { DeBruijnNode newNode = null; if (kmerDataCollection.TryTake(out newNode, -1)) { // Tree Node Creation // create a new node if (this.root == null) // first element being added { this.root = newNode; // set node as root of the tree this.NodeCount++; newNode = null; continue; } int result = 0; DeBruijnNode temp = this.root; DeBruijnNode parent = this.root; // Search the tree where the new node should be inserted while (temp != null) { result = newNode.NodeValue.CompareTo(temp.NodeValue); if (result == 0) { if (temp.KmerCount <= 255) { temp.KmerCount++; break; } } else if (result > 0) // move to right sub-tree { parent = temp; temp = temp.Right; } else if (result < 0) // move to left sub-tree { parent = temp; temp = temp.Left; } } // position found if (result > 0) // add as right child { parent.Right = newNode; NodeCount++; } else if (result < 0) // add as left child { parent.Left = newNode; NodeCount++; } } // End of tree node creation. } }); Task.WaitAll(createKmers, buildKmers); kmerDataCollection.Dispose(); this.GraphBuildCompleted = true; // Generate the links this.GenerateLinks(); }
/// <summary> /// Makes extension edge corresponding to the node invalid, /// after checking whether given node is part of left or right extensions. /// Not Thread-safe. Use lock at caller if required. /// </summary> /// <param name="node">Node for which extension is to be made invalid.</param> public void MarkExtensionInvalid(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } if (!this.MarkRightExtensionAsInvalid(node)) { this.MarkLeftExtensionAsInvalid(node); } }
/// <summary> /// Deletes the extension nodes those are marked for deletion. /// </summary> public void RemoveMarkedExtensions() { // If node is marked for deletion, ignore it. No need for any update. if (this.IsMarkedForDelete) { return; } if (this.RightExtension0 != null && this.RightExtension0.IsMarkedForDelete) { this.RightExtension0 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension1 != null && this.RightExtension1.IsMarkedForDelete) { this.RightExtension1 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension2 != null && this.RightExtension2.IsMarkedForDelete) { this.RightExtension2 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.RightExtension3 != null && this.RightExtension3.IsMarkedForDelete) { this.RightExtension3 = null; lock (this) { this.RightExtensionNodesCount--; } } if (this.LeftExtension0 != null && this.LeftExtension0.IsMarkedForDelete) { this.LeftExtension0 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension1 != null && this.LeftExtension1.IsMarkedForDelete) { this.LeftExtension1 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension2 != null && this.LeftExtension2.IsMarkedForDelete) { this.LeftExtension2 = null; lock (this) { this.LeftExtensionNodesCount--; } } if (this.LeftExtension3 != null && this.LeftExtension3.IsMarkedForDelete) { this.LeftExtension3 = null; lock (this) { this.LeftExtensionNodesCount--; } } }
/// <summary> /// Traces diverging paths in given direction. /// For each path in the set of diverging paths, extend path by one node /// at a time. Continue this until all diverging paths converge to a /// single node or length threshold is exceeded. /// If paths converge, add path cluster containing list of redundant /// path nodes to list of redundant paths and return. /// </summary> /// <param name="startNode">Node at starting point of divergence.</param> /// <param name="divergingNodes">List of diverging nodes.</param> /// <param name="isForwardExtension">Bool indicating direction of divergence.</param> /// <param name="redundantPaths">List of redundant paths.</param> private void TraceDivergingExtensionPaths( DeBruijnNode startNode, Dictionary<DeBruijnNode, bool> divergingNodes, bool isForwardExtension, List<DeBruijnPathList> redundantPaths) { List<PathWithOrientation> divergingPaths = new List<PathWithOrientation>( divergingNodes.Select(n => new PathWithOrientation(startNode, n.Key, n.Value))); int divergingPathLengh = 2; // Extend paths till length threshold is exceeded. // In case paths coverge within threshold, we break out of while. while (divergingPathLengh <= this.pathLengthThreshold) { // Extension is possible only if end point of all paths has exactly one extension // In case extensions count is 0, no extensions possible for some path (or) // if extensions is more than 1, they are diverging further. Not considered a redundant path if (divergingPaths.Any(p => ((isForwardExtension ^ p.IsSameOrientation) ? p.Nodes.Last().LeftExtensionNodesCount : p.Nodes.Last().RightExtensionNodesCount) != 1)) { return; } // Extend each path in cluster. While performing path extension // also keep track of whether they have converged bool hasConverged = true; foreach (PathWithOrientation path in divergingPaths) { DeBruijnNode endNode = path.Nodes.Last(); Dictionary<DeBruijnNode, bool> extensions = (isForwardExtension ^ path.IsSameOrientation) ? endNode.GetLeftExtensionNodesWithOrientation() : endNode.GetRightExtensionNodesWithOrientation(); KeyValuePair<DeBruijnNode, bool> nextNode = extensions.First(); if (path.Nodes.Contains(nextNode.Key)) { // Loop in path return; } else { // Update path orientation path.IsSameOrientation = !(path.IsSameOrientation ^ nextNode.Value); path.Nodes.Add(nextNode.Key); // Check if paths so far are converged if (hasConverged && nextNode.Key != divergingPaths.First().Nodes.Last()) { // Last node added is different. Paths do not converge hasConverged = false; } } } divergingPathLengh++; // Paths have been extended. Check for convergence if (hasConverged) { // Note: all paths have the same end node. lock (redundantPaths) { // Redundant paths found redundantPaths.Add(new DeBruijnPathList(divergingPaths.Select(p => new DeBruijnPath(p.Nodes)))); } return; } } }
/// <summary> /// Initializes a new instance of the DeBruijnPath class with specified node. /// </summary> /// <param name="node">Graph node.</param> public DeBruijnPath(DeBruijnNode node) { this.path = new List <DeBruijnNode> { node }; }
/// <summary> /// Gets the sequence from the specified node. /// </summary> /// <param name="node">DeBruijn node.</param> /// <returns>Returns an instance of sequence.</returns> public ISequence GetNodeSequence(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } return new Sequence(Alphabets.DNA, node.GetOriginalSymbols(KmerLength)); }