Esempio n. 1
0
 /// <summary>
 /// Check if input node is null
 /// </summary>
 /// <param name="node">Input node</param>
 private static void ValidateNode(DeBruijnNode node)
 {
     if (node == null)
     {
         throw new ArgumentNullException("node");
     }
 }
Esempio n. 2
0
        /// <summary>
        /// Gets the sequence for kmer associated with input node.
        /// Uses index and position information along with base sequence
        /// to construct sequence.
        /// There should be atleast one valid position in the node.
        /// Since all positions indicate the same kmer sequence,
        /// the position information from the first kmer is used
        /// to construct the sequence
        /// </summary>
        /// <param name="node">Graph Node</param>
        /// <returns>Sequence associated with input node</returns>
        public ISequence GetNodeSequence(DeBruijnNode node)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            // Get sequence index and validate
            int sequenceIndex = node.SequenceIndex;

            if (sequenceIndex < 0 || sequenceIndex >= _baseSequence.Count)
            {
                throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerIndexOutOfRange);
            }

            // Get base sequence, position and validate
            ISequence baseSequence = _baseSequence[sequenceIndex];
            int       position     = node.KmerPosition;

            if (position < 0 || position + node.KmerLength > baseSequence.Count)
            {
                throw new ArgumentOutOfRangeException("node", Properties.Resource.KmerPositionOutOfRange);
            }

            if (position == 0 && baseSequence.Count == node.KmerLength)
            {
                return(baseSequence);
            }

            return(baseSequence.Range(position, node.KmerLength));
        }
Esempio n. 3
0
 /// <summary>
 /// Makes extension edge corresponding to the node invalid,
 /// after checking whether given node is part of left or right extensions.
 /// Not Thread-safe. Use lock at caller if required.
 /// </summary>
 /// <param name="node">Node for which extension is to be made invalid</param>
 public void MarkExtensionInvalid(DeBruijnNode node)
 {
     ValidateNode(node);
     if (_rightEndExtensionNodes.ContainsKey(node))
     {
         _rightEndExtensionNodes[node].IsValid = false;
     }
     else if (_leftEndExtensionNodes.ContainsKey(node))
     {
         _leftEndExtensionNodes[node].IsValid = false;
     }
 }
Esempio n. 4
0
        /// <summary>
        /// Add node with given orientation to right extension edges.
        /// Not thread-safe. Use lock at caller if required.
        /// </summary>
        /// <param name="node">Node to add right-extension to</param>
        /// <param name="isSameOrientation">Orientation of connecting edge</param>
        public void AddRightEndExtension(DeBruijnNode node, bool isSameOrientation)
        {
            ValidateNode(node);
            DeBruijnEdge edge;

            if (_rightEndExtensionNodes.TryGetValue(node, out edge))
            {
                _rightEndExtensionNodes[node].IsSameOrientation ^= isSameOrientation;
            }
            else
            {
                _rightEndExtensionNodes[node] = new DeBruijnEdge(isSameOrientation);
            }
        }
Esempio n. 5
0
        /// <summary>
        /// Removes edge corresponding to the node from appropriate data structure,
        /// after checking whether given node is part of left or right extensions.
        /// Thread-safe method
        /// </summary>
        /// <param name="node">Node for which extension is to be removed</param>
        public void RemoveExtensionThreadSafe(DeBruijnNode node)
        {
            ValidateNode(node);
            bool removed;

            lock (_rightEndExtensionNodes)
            {
                removed = _rightEndExtensionNodes.Remove(node);
            }

            if (!removed)
            {
                lock (_leftEndExtensionNodes)
                {
                    _leftEndExtensionNodes.Remove(node);
                }
            }
        }
Esempio n. 6
0
        /// <summary>
        /// Builds a contig graph from kmer graph using contig data information.
        /// Creates a graph node for each contig, computes adjacency
        /// for contig graph using edge information in kmer graph.
        /// Finally, all kmer nodes are deleted from the graph.
        /// </summary>
        /// <param name="contigs">List of contig data</param>
        /// <param name="kmerLength">Kmer length</param>
        public void BuildContigGraph(IList <ISequence> contigs, int kmerLength)
        {
            if (contigs == null)
            {
                throw new ArgumentNullException("contigs");
            }

            if (kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            // Create contig nodes
            DeBruijnNode[] contigNodes = new DeBruijnNode[contigs.Count];
            Parallel.For(0, contigs.Count, ndx => contigNodes[ndx] = new DeBruijnNode(contigs[ndx].Count, ndx));

            GenerateContigAdjacency(contigs, kmerLength, contigNodes);

            // Update graph with new nodes
            _baseSequence = new List <ISequence>(contigs);
            _kmerNodes    = new HashSet <DeBruijnNode>(contigNodes);
        }
Esempio n. 7
0
        /// <summary>
        /// Generate adjacency information between nodes
        /// by computing overlapping regions between sequences.
        /// </summary>
        /// <param name="kmerNodeMap">Graph Nodes mapped by sequence string</param>
        /// <param name="kmerLength">Kmer Length</param>
        private void GenerateAdjacency(Dictionary <string, KmerDataGraphNodePair> kmerNodeMap, int kmerLength)
        {
            // Nothing to do if there are no nodes.
            if (kmerNodeMap.Count == 0)
            {
                return;
            }

            // All nodes have sequences of equal length.
            // Hence, obtained once and stored for optimization.
            using (ThreadLocal <Tuple <char[], char[]> > kmerBuilders = new ThreadLocal <Tuple <char[], char[]> >
                                                                            (() => Tuple.Create <char[], char[]>(new char[kmerLength], new char[kmerLength])))
            {
                Parallel.ForEach(kmerNodeMap, nodeValue =>
                {
                    bool orientation = nodeValue.Value.KeyHasSameOrientation;
                    string kmerString;
                    string kmerStringRC;
                    if (orientation)
                    {
                        kmerString   = nodeValue.Key;
                        kmerStringRC = kmerString.GetReverseComplement(kmerBuilders.Value.Item1);
                    }
                    else
                    {
                        kmerStringRC = nodeValue.Key;
                        kmerString   = kmerStringRC.GetReverseComplement(kmerBuilders.Value.Item1);
                    }
                    DeBruijnNode node = nodeValue.Value.Node;

                    char[] nextKmer   = kmerBuilders.Value.Item1;
                    char[] nextKmerRC = kmerBuilders.Value.Item2;

                    // Query its possible four extensions in the right (forward)
                    // If it exists, set 'right' edge information in current node

                    // The kmer sequence of right extension nodes should either start with
                    // (k-1) length right-substring or ends with its reverse complement.
                    // Get required substring from current node's sequence, and
                    // add a dummy character to make length equal to k.
                    kmerString.CopyTo(1, nextKmer, 0, kmerLength - 1);     // right sub-string
                    kmerStringRC.CopyTo(0, nextKmerRC, 1, kmerLength - 1); // reverse-complement

                    for (int i = 0; i < DnaSymbols.Length; i++)
                    {
                        nextKmer[kmerLength - 1] = DnaSymbols[i];                        // replace last character with dnaChar
                        KmerDataGraphNodePair nextNode;
                        if (kmerNodeMap.TryGetValue(new string(nextKmer), out nextNode)) // check if the kmer exists
                        {
                            // Add right extension with orientation set to true
                            // Ok to use unsafe add method since each parallel thread works with a different node
                            node.AddRightEndExtension(nextNode.Node, nextNode.KeyHasSameOrientation);
                        }
                        else
                        {
                            nextKmerRC[0] = DnaSymbolsComplement[i];
                            if (kmerNodeMap.TryGetValue(new string(nextKmerRC), out nextNode))
                            {
                                // Add right extension with orientation set to false
                                // Ok to use unsafe add method since each parallel thread works with a different node
                                node.AddRightEndExtension(nextNode.Node, !nextNode.KeyHasSameOrientation);
                            }
                        }
                    }

                    // Repeat above exercise for left extensions
                    // The kmer sequence of left extension nodes should either end with
                    // (k-1) length left-substring or ends with its reverse complement.
                    // Get required substring from current node's sequence, and
                    // add a dummy character to make length equal to k.
                    kmerString.CopyTo(0, nextKmer, 1, kmerLength - 1);
                    kmerStringRC.CopyTo(1, nextKmerRC, 0, kmerLength - 1);

                    for (int i = 0; i < DnaSymbols.Length; i++)
                    {
                        nextKmer[0] = DnaSymbols[i];                                     // replace first character with new DNA character
                        KmerDataGraphNodePair nextNode;
                        if (kmerNodeMap.TryGetValue(new string(nextKmer), out nextNode)) // check if the kmer exists
                        {
                            // Add left extension with orientation set to true
                            node.AddLeftEndExtension(nextNode.Node, nextNode.KeyHasSameOrientation);
                        }
                        else
                        {
                            nextKmerRC[kmerLength - 1] = DnaSymbolsComplement[i];
                            if (kmerNodeMap.TryGetValue(new string(nextKmerRC), out nextNode))
                            {
                                // Add left extension with orientation set to false
                                node.AddLeftEndExtension(nextNode.Node, !nextNode.KeyHasSameOrientation);
                            }
                        }
                    }
                });
            }
        }
Esempio n. 8
0
 /// <summary>
 /// Initializes a new instance of the DeBruijnPath class with specified node.
 /// </summary>
 /// <param name="node">Graph node</param>
 public DeBruijnPath(DeBruijnNode node)
 {
     _path = new List <DeBruijnNode> {
         node
     };
 }