コード例 #1
0
        /// <summary>
        /// Searches for a particular node in the tree.
        /// </summary>
        /// <param name="kmerValue">The node to be searched.</param>
        /// <returns>Actual node in the tree.</returns>
        public DeBruijnNode SearchTree(IKmerData kmerValue)
        {
            // this should never happen.
            if (kmerValue == null)
            {
                return(null);
            }

            DeBruijnNode startNode = this.root;

            while (startNode != null)
            {
                int result = kmerValue.CompareTo(startNode.NodeValue);

                // parameter value found
                if (result == 0)
                {
                    break;
                }
                else if (result < 0)
                {
                    // Search left if the value is smaller than the current node
                    startNode = startNode.Left; // search left
                }
                else
                {
                    startNode = startNode.Right; // search right
                }
            }

            return(startNode);
        }
コード例 #2
0
        /// <summary>
        /// Compares this instance to a specified instance of IKmerData and returns an indication of their relative values.
        /// </summary>
        /// <param name="kmer">Instance of the IKmerData to compare.</param>
        /// <returns>
        /// A signed number indicating the relative values of this instance. Zero This
        /// instance is equal to value. Greater than zero This instance is greater than
        /// value.
        /// </returns>
        public int CompareTo(IKmerData kmer)
        {
            if (kmer == null)
            {
                throw new ArgumentNullException("kmer");
            }

            ulong compValue = ((KmerData32)kmer).kmerData;

            if (this.kmerData == compValue)
            {
                return(0);
            }
            else if (this.kmerData < compValue)
            {
                return(-1);
            }
            else
            {
                return(1);
            }
        }
コード例 #3
0
 /// <summary>
 /// Initializes a new instance of the DeBruijnNode class.
 /// </summary>
 public DeBruijnNode(IKmerData value, bool orientation, byte count)
 {
     this.NodeValue           = value;
     this.KmerCount           = count;
     this.NodeDataOrientation = orientation;
 }
コード例 #4
0
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        public void Build(IEnumerable <ISequence> sequences)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (this.kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>();

            Task buildKmers = Task.Factory.StartNew(() =>
            {
                while (!kmerDataCollection.IsCompleted)
                {
                    DeBruijnNode newNode = null;
                    if (kmerDataCollection.TryTake(out newNode, -1))
                    {
                        // Tree Node Creation

                        // create a new node
                        if (this.root == null)   // first element being added
                        {
                            this.root = newNode; // set node as root of the tree
                            this.NodeCount++;
                            continue;
                        }

                        int result          = 0;
                        DeBruijnNode temp   = this.root;
                        DeBruijnNode parent = this.root;

                        // Search the tree where the new node should be inserted
                        while (temp != null)
                        {
                            result = newNode.NodeValue.CompareTo(temp.NodeValue);
                            if (result == 0)
                            {
                                if (temp.KmerCount <= 255)
                                {
                                    temp.KmerCount++;
                                    break;
                                }
                            }
                            else if (result > 0) // move to right sub-tree
                            {
                                parent = temp;
                                temp   = temp.Right;
                            }
                            else if (result < 0) // move to left sub-tree
                            {
                                parent = temp;
                                temp   = temp.Left;
                            }
                        }

                        // position found
                        if (result > 0) // add as right child
                        {
                            parent.Right = newNode;
                            NodeCount++;
                        }
                        else if (result < 0) // add as left child
                        {
                            parent.Left = newNode;
                            NodeCount++;
                        }
                    } // End of tree node creation.
                }
            });

            IAlphabet alphabet = sequences.First().Alphabet;

            byte[]         symbolMap        = alphabet.GetSymbolValueMap();
            HashSet <byte> ambiguousSymbols = alphabet.GetAmbiguousSymbols();
            HashSet <byte> gapSymbols;

            alphabet.TryGetGapSymbols(out gapSymbols);

            // Generate the kmers from the sequences
            foreach (ISequence sequence in sequences)
            {
                // if the blocking collection count is exceeding 2 million wait for 5 sec
                // so that the task can remove some kmers and creat the nodes.
                // This will avoid OutofMemoryException
                while (kmerDataCollection.Count > 2000000)
                {
                    System.Threading.Thread.Sleep(5);
                }

                long   count            = sequence.Count;
                byte[] convertedSymbols = new byte[count];
                bool   skipSequence     = false;

                for (long index = 0; index < count; index++)
                {
                    convertedSymbols[index] = symbolMap[sequence[index]];
                    if (ambiguousSymbols.Contains(convertedSymbols[index]) || gapSymbols.Contains(convertedSymbols[index]))
                    {
                        skipSequence = true;
                        break;
                    }
                }

                if (skipSequence)
                {
                    continue;
                }

                Sequence convertedSequence = new Sequence(sequence.Alphabet, convertedSymbols, false);

                // generate the kmers from each sequence
                for (long i = 0; i <= count - this.kmerLength; ++i)
                {
                    IKmerData kmerData    = this.GetNewKmerData();
                    bool      orientation = kmerData.SetKmerData(convertedSequence, i, this.kmerLength);
                    kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1));
                }
            }

            kmerDataCollection.CompleteAdding();

            Task.WaitAll(buildKmers);

            kmerDataCollection.Dispose();

            // Generate the links
            this.GenerateLinks();
        }
コード例 #5
0
        /// <summary>
        /// Adds the links between the nodes of the graph.
        /// </summary>
        private void GenerateLinks()
        {
            Parallel.ForEach(
                this.GetNodes(),
                node =>
            {
                DeBruijnNode searchResult = null;
                IKmerData searchNodeValue = GetNewKmerData();
                string kmerString;
                string kmerStringRC;
                if (node.NodeDataOrientation)
                {
                    kmerString   = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength));
                    kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength));
                }
                else
                {
                    kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength));
                    kmerString   = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength));
                }

                string nextKmer;
                string nextKmerRC;

                // Right Extensions
                nextKmer   = kmerString.Substring(1);
                nextKmerRC = kmerStringRC.Substring(0, kmerLength - 1);
                for (int i = 0; i < DnaSymbols.Length; i++)
                {
                    string tmpNextKmer = nextKmer + DnaSymbols[i];
                    searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength);
                    searchResult = this.SearchTree(searchNodeValue);
                    if (searchResult != null)
                    {
                        node.SetExtensionNodes(true, searchResult.NodeDataOrientation, searchResult);
                    }
                    else
                    {
                        string tmpnextKmerRC = DnaSymbolsComplement[i] + nextKmerRC;
                        searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpnextKmerRC), this.kmerLength);
                        searchResult = this.SearchTree(searchNodeValue);
                        if (searchResult != null)
                        {
                            node.SetExtensionNodes(true, !searchResult.NodeDataOrientation, searchResult);
                        }
                    }
                }

                // Left Extensions
                nextKmer   = kmerString.Substring(0, kmerLength - 1);
                nextKmerRC = kmerStringRC.Substring(1);
                for (int i = 0; i < DnaSymbols.Length; i++)
                {
                    string tmpNextKmer = DnaSymbols[i] + nextKmer;
                    searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength);
                    searchResult = this.SearchTree(searchNodeValue);
                    if (searchResult != null)
                    {
                        node.SetExtensionNodes(false, searchResult.NodeDataOrientation, searchResult);
                    }
                    else
                    {
                        string tmpNextKmerRC = nextKmerRC + DnaSymbolsComplement[i];
                        searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmerRC), this.kmerLength);
                        searchResult = this.SearchTree(searchNodeValue);
                        if (searchResult != null)
                        {
                            node.SetExtensionNodes(false, !searchResult.NodeDataOrientation, searchResult);
                        }
                    }
                }
            });
        }
コード例 #6
0
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        public void Build(IEnumerable <ISequence> sequences)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (this.kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            if (this.kmerLength > 32)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthGreaterThan32);
            }

            BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>();

            Task createKmers = Task.Factory.StartNew(() =>
            {
                IAlphabet alphabet = Alphabets.DNA;

                HashSet <byte> gapSymbols;
                alphabet.TryGetGapSymbols(out gapSymbols);

                // Generate the kmers from the sequences
                foreach (ISequence sequence in sequences)
                {
                    // if the sequence alphabet is not of type DNA then ignore it.
                    if (sequence.Alphabet != Alphabets.DNA)
                    {
                        Interlocked.Increment(ref this.skippedSequencesCount);
                        Interlocked.Increment(ref this.processedSequencesCount);
                        continue;
                    }

                    // if the sequence contains any gap symbols then ignore the sequence.
                    bool skipSequence = false;
                    foreach (byte symbol in gapSymbols)
                    {
                        for (long index = 0; index < sequence.Count; ++index)
                        {
                            if (sequence[index] == symbol)
                            {
                                skipSequence = true;
                                break;
                            }
                        }

                        if (skipSequence)
                        {
                            break;
                        }
                    }

                    if (skipSequence)
                    {
                        Interlocked.Increment(ref this.skippedSequencesCount);
                        Interlocked.Increment(ref this.processedSequencesCount);
                        continue;
                    }

                    // if the blocking collection count is exceeding 2 million wait for 5 sec
                    // so that the task can remove some kmers and creat the nodes.
                    // This will avoid OutofMemoryException
                    while (kmerDataCollection.Count > 2000000)
                    {
                        System.Threading.Thread.Sleep(5);
                    }

                    long count = sequence.Count;

                    // generate the kmers from each sequence
                    for (long i = 0; i <= count - this.kmerLength; ++i)
                    {
                        IKmerData kmerData = this.GetNewKmerData();
                        bool orientation   = kmerData.SetKmerData(sequence, i, this.kmerLength);
                        kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1));
                    }

                    Interlocked.Increment(ref this.processedSequencesCount);
                }

                kmerDataCollection.CompleteAdding();
            });

            Task buildKmers = Task.Factory.StartNew(() =>
            {
                while (!kmerDataCollection.IsCompleted)
                {
                    DeBruijnNode newNode = null;
                    if (kmerDataCollection.TryTake(out newNode, -1))
                    {
                        // Tree Node Creation

                        // create a new node
                        if (this.root == null)   // first element being added
                        {
                            this.root = newNode; // set node as root of the tree
                            this.NodeCount++;
                            newNode = null;
                            continue;
                        }

                        int result          = 0;
                        DeBruijnNode temp   = this.root;
                        DeBruijnNode parent = this.root;

                        // Search the tree where the new node should be inserted
                        while (temp != null)
                        {
                            result = newNode.NodeValue.CompareTo(temp.NodeValue);
                            if (result == 0)
                            {
                                if (temp.KmerCount <= 255)
                                {
                                    temp.KmerCount++;
                                    break;
                                }
                            }
                            else if (result > 0) // move to right sub-tree
                            {
                                parent = temp;
                                temp   = temp.Right;
                            }
                            else if (result < 0) // move to left sub-tree
                            {
                                parent = temp;
                                temp   = temp.Left;
                            }
                        }

                        // position found
                        if (result > 0) // add as right child
                        {
                            parent.Right = newNode;
                            NodeCount++;
                        }
                        else if (result < 0) // add as left child
                        {
                            parent.Left = newNode;
                            NodeCount++;
                        }
                    } // End of tree node creation.
                }
            });

            Task.WaitAll(createKmers, buildKmers);

            kmerDataCollection.Dispose();
            this.GraphBuildCompleted = true;

            // Generate the links
            this.GenerateLinks();
        }