/// <summary> /// Check if input node is null /// </summary> /// <param name="node">Input node</param> private static void ValidateNode(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } }
/// <summary> /// Gets the sequence for kmer associated with input node. /// Uses index and position information along with base sequence /// to construct sequence. /// There should be atleast one valid position in the node. /// Since all positions indicate the same kmer sequence, /// the position information from the first kmer is used /// to construct the sequence /// </summary> /// <param name="node">Graph Node</param> /// <returns>Sequence associated with input node</returns> public ISequence GetNodeSequence(DeBruijnNode node) { if (node == null) { throw new ArgumentNullException("node"); } // Get sequence index and validate int sequenceIndex = node.SequenceIndex; if (sequenceIndex < 0 || sequenceIndex >= _baseSequence.Count) { throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerIndexOutOfRange); } // Get base sequence, position and validate ISequence baseSequence = _baseSequence[sequenceIndex]; int position = node.KmerPosition; if (position < 0 || position + node.KmerLength > baseSequence.Count) { throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerPositionOutOfRange); } if (position == 0 && baseSequence.Count == node.KmerLength) { return(baseSequence); } return(baseSequence.Range(position, node.KmerLength)); }
/// <summary> /// Makes extension edge corresponding to the node invalid, /// after checking whether given node is part of left or right extensions. /// Not Thread-safe. Use lock at caller if required. /// </summary> /// <param name="node">Node for which extension is to be made invalid</param> public void MarkExtensionInvalid(DeBruijnNode node) { ValidateNode(node); if (_rightEndExtensionNodes.ContainsKey(node)) { _rightEndExtensionNodes[node].IsValid = false; } else if (_leftEndExtensionNodes.ContainsKey(node)) { _leftEndExtensionNodes[node].IsValid = false; } }
/// <summary> /// Add node with given orientation to right extension edges. /// Not thread-safe. Use lock at caller if required. /// </summary> /// <param name="node">Node to add right-extension to</param> /// <param name="isSameOrientation">Orientation of connecting edge</param> public void AddRightEndExtension(DeBruijnNode node, bool isSameOrientation) { ValidateNode(node); DeBruijnEdge edge; if (_rightEndExtensionNodes.TryGetValue(node, out edge)) { _rightEndExtensionNodes[node].IsSameOrientation ^= isSameOrientation; } else { _rightEndExtensionNodes[node] = new DeBruijnEdge(isSameOrientation); } }
/// <summary> /// Removes edge corresponding to the node from appropriate data structure, /// after checking whether given node is part of left or right extensions. /// Thread-safe method /// </summary> /// <param name="node">Node for which extension is to be removed</param> public void RemoveExtensionThreadSafe(DeBruijnNode node) { ValidateNode(node); bool removed; lock (_rightEndExtensionNodes) { removed = _rightEndExtensionNodes.Remove(node); } if (!removed) { lock (_leftEndExtensionNodes) { _leftEndExtensionNodes.Remove(node); } } }
/// <summary> /// Builds a contig graph from kmer graph using contig data information. /// Creates a graph node for each contig, computes adjacency /// for contig graph using edge information in kmer graph. /// Finally, all kmer nodes are deleted from the graph. /// </summary> /// <param name="contigs">List of contig data</param> /// <param name="kmerLength">Kmer length</param> public void BuildContigGraph(IList <ISequence> contigs, int kmerLength) { if (contigs == null) { throw new ArgumentNullException("contigs"); } if (kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive); } // Create contig nodes DeBruijnNode[] contigNodes = new DeBruijnNode[contigs.Count]; Parallel.For(0, contigs.Count, ndx => contigNodes[ndx] = new DeBruijnNode(contigs[ndx].Count, ndx)); GenerateContigAdjacency(contigs, kmerLength, contigNodes); // Update graph with new nodes _baseSequence = new List <ISequence>(contigs); _kmerNodes = new HashSet <DeBruijnNode>(contigNodes); }
/// <summary> /// Generate adjacency information between nodes /// by computing overlapping regions between sequences. /// </summary> /// <param name="kmerNodeMap">Graph Nodes mapped by sequence string</param> /// <param name="kmerLength">Kmer Length</param> private void GenerateAdjacency(Dictionary <string, KmerDataGraphNodePair> kmerNodeMap, int kmerLength) { // Nothing to do if there are no nodes. if (kmerNodeMap.Count == 0) { return; } // All nodes have sequences of equal length. // Hence, obtained once and stored for optimization. using (ThreadLocal <Tuple <char[], char[]> > kmerBuilders = new ThreadLocal <Tuple <char[], char[]> > (() => Tuple.Create <char[], char[]>(new char[kmerLength], new char[kmerLength]))) { Parallel.ForEach(kmerNodeMap, nodeValue => { bool orientation = nodeValue.Value.KeyHasSameOrientation; string kmerString; string kmerStringRC; if (orientation) { kmerString = nodeValue.Key; kmerStringRC = kmerString.GetReverseComplement(kmerBuilders.Value.Item1); } else { kmerStringRC = nodeValue.Key; kmerString = kmerStringRC.GetReverseComplement(kmerBuilders.Value.Item1); } DeBruijnNode node = nodeValue.Value.Node; char[] nextKmer = kmerBuilders.Value.Item1; char[] nextKmerRC = kmerBuilders.Value.Item2; // Query its possible four extensions in the right (forward) // If it exists, set 'right' edge information in current node // The kmer sequence of right extension nodes should either start with // (k-1) length right-substring or ends with its reverse complement. // Get required substring from current node's sequence, and // add a dummy character to make length equal to k. kmerString.CopyTo(1, nextKmer, 0, kmerLength - 1); // right sub-string kmerStringRC.CopyTo(0, nextKmerRC, 1, kmerLength - 1); // reverse-complement for (int i = 0; i < DnaSymbols.Length; i++) { nextKmer[kmerLength - 1] = DnaSymbols[i]; // replace last character with dnaChar KmerDataGraphNodePair nextNode; if (kmerNodeMap.TryGetValue(new string(nextKmer), out nextNode)) // check if the kmer exists { // Add right extension with orientation set to true // Ok to use unsafe add method since each parallel thread works with a different node node.AddRightEndExtension(nextNode.Node, nextNode.KeyHasSameOrientation); } else { nextKmerRC[0] = DnaSymbolsComplement[i]; if (kmerNodeMap.TryGetValue(new string(nextKmerRC), out nextNode)) { // Add right extension with orientation set to false // Ok to use unsafe add method since each parallel thread works with a different node node.AddRightEndExtension(nextNode.Node, !nextNode.KeyHasSameOrientation); } } } // Repeat above exercise for left extensions // The kmer sequence of left extension nodes should either end with // (k-1) length left-substring or ends with its reverse complement. // Get required substring from current node's sequence, and // add a dummy character to make length equal to k. kmerString.CopyTo(0, nextKmer, 1, kmerLength - 1); kmerStringRC.CopyTo(1, nextKmerRC, 0, kmerLength - 1); for (int i = 0; i < DnaSymbols.Length; i++) { nextKmer[0] = DnaSymbols[i]; // replace first character with new DNA character KmerDataGraphNodePair nextNode; if (kmerNodeMap.TryGetValue(new string(nextKmer), out nextNode)) // check if the kmer exists { // Add left extension with orientation set to true node.AddLeftEndExtension(nextNode.Node, nextNode.KeyHasSameOrientation); } else { nextKmerRC[kmerLength - 1] = DnaSymbolsComplement[i]; if (kmerNodeMap.TryGetValue(new string(nextKmerRC), out nextNode)) { // Add left extension with orientation set to false node.AddLeftEndExtension(nextNode.Node, !nextNode.KeyHasSameOrientation); } } } }); } }
/// <summary> /// Initializes a new instance of the DeBruijnPath class with specified node. /// </summary> /// <param name="node">Graph node</param> public DeBruijnPath(DeBruijnNode node) { _path = new List <DeBruijnNode> { node }; }