Ejemplo n.º 1
        /// <summary>
        /// Adds the links between the nodes of the graph.
        /// </summary>
        private void GenerateLinks()
                node =>
                DeBruijnNode searchResult = null;
                IKmerData searchNodeValue = GetNewKmerData();
                string kmerString;
                string kmerStringRC;
                if (node.NodeDataOrientation)
                    kmerString   = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength));
                    kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength));
                    kmerStringRC = Encoding.Default.GetString(node.NodeValue.GetKmerData(this.kmerLength));
                    kmerString   = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(this.KmerLength));

                string nextKmer;
                string nextKmerRC;

                // Right Extensions
                nextKmer   = kmerString.Substring(1);
                nextKmerRC = kmerStringRC.Substring(0, kmerLength - 1);
                for (int i = 0; i < DnaSymbols.Length; i++)
                    string tmpNextKmer = nextKmer + DnaSymbols[i];
                    searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength);
                    searchResult = this.SearchTree(searchNodeValue);
                    if (searchResult != null)
                        node.SetExtensionNodes(true, searchResult.NodeDataOrientation, searchResult);
                        string tmpnextKmerRC = DnaSymbolsComplement[i] + nextKmerRC;
                        searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpnextKmerRC), this.kmerLength);
                        searchResult = this.SearchTree(searchNodeValue);
                        if (searchResult != null)
                            node.SetExtensionNodes(true, !searchResult.NodeDataOrientation, searchResult);

                // Left Extensions
                nextKmer   = kmerString.Substring(0, kmerLength - 1);
                nextKmerRC = kmerStringRC.Substring(1);
                for (int i = 0; i < DnaSymbols.Length; i++)
                    string tmpNextKmer = DnaSymbols[i] + nextKmer;
                    searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), this.kmerLength);
                    searchResult = this.SearchTree(searchNodeValue);
                    if (searchResult != null)
                        node.SetExtensionNodes(false, searchResult.NodeDataOrientation, searchResult);
                        string tmpNextKmerRC = nextKmerRC + DnaSymbolsComplement[i];
                        searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmerRC), this.kmerLength);
                        searchResult = this.SearchTree(searchNodeValue);
                        if (searchResult != null)
                            node.SetExtensionNodes(false, !searchResult.NodeDataOrientation, searchResult);
Ejemplo n.º 2
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        public void Build(IEnumerable <ISequence> sequences)
            if (sequences == null)
                throw new ArgumentNullException("sequences");

            if (this.kmerLength <= 0)
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);

            BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>();

            Task buildKmers = Task.Factory.StartNew(() =>
                while (!kmerDataCollection.IsCompleted)
                    DeBruijnNode newNode = null;
                    if (kmerDataCollection.TryTake(out newNode, -1))
                        // Tree Node Creation

                        // create a new node
                        if (this.root == null)   // first element being added
                            this.root = newNode; // set node as root of the tree

                        int result          = 0;
                        DeBruijnNode temp   = this.root;
                        DeBruijnNode parent = this.root;

                        // Search the tree where the new node should be inserted
                        while (temp != null)
                            result = newNode.NodeValue.CompareTo(temp.NodeValue);
                            if (result == 0)
                                if (temp.KmerCount <= 255)
                            else if (result > 0) // move to right sub-tree
                                parent = temp;
                                temp   = temp.Right;
                            else if (result < 0) // move to left sub-tree
                                parent = temp;
                                temp   = temp.Left;

                        // position found
                        if (result > 0) // add as right child
                            parent.Right = newNode;
                        else if (result < 0) // add as left child
                            parent.Left = newNode;
                    } // End of tree node creation.

            IAlphabet alphabet = sequences.First().Alphabet;

            byte[]         symbolMap        = alphabet.GetSymbolValueMap();
            HashSet <byte> ambiguousSymbols = alphabet.GetAmbiguousSymbols();
            HashSet <byte> gapSymbols;

            alphabet.TryGetGapSymbols(out gapSymbols);

            // Generate the kmers from the sequences
            foreach (ISequence sequence in sequences)
                // if the blocking collection count is exceeding 2 million wait for 5 sec
                // so that the task can remove some kmers and creat the nodes.
                // This will avoid OutofMemoryException
                while (kmerDataCollection.Count > 2000000)

                long   count            = sequence.Count;
                byte[] convertedSymbols = new byte[count];
                bool   skipSequence     = false;

                for (long index = 0; index < count; index++)
                    convertedSymbols[index] = symbolMap[sequence[index]];
                    if (ambiguousSymbols.Contains(convertedSymbols[index]) || gapSymbols.Contains(convertedSymbols[index]))
                        skipSequence = true;

                if (skipSequence)

                Sequence convertedSequence = new Sequence(sequence.Alphabet, convertedSymbols, false);

                // generate the kmers from each sequence
                for (long i = 0; i <= count - this.kmerLength; ++i)
                    IKmerData kmerData    = this.GetNewKmerData();
                    bool      orientation = kmerData.SetKmerData(convertedSequence, i, this.kmerLength);
                    kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1));




            // Generate the links
Ejemplo n.º 3
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        public void Build(IEnumerable <ISequence> sequences)
            if (sequences == null)
                throw new ArgumentNullException("sequences");

            if (this.kmerLength <= 0)
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);

            if (this.kmerLength > 32)
                throw new ArgumentException(Properties.Resource.KmerLengthGreaterThan32);

            BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>();

            Task createKmers = Task.Factory.StartNew(() =>
                IAlphabet alphabet = Alphabets.DNA;

                HashSet <byte> gapSymbols;
                alphabet.TryGetGapSymbols(out gapSymbols);

                // Generate the kmers from the sequences
                foreach (ISequence sequence in sequences)
                    // if the sequence alphabet is not of type DNA then ignore it.
                    if (sequence.Alphabet != Alphabets.DNA)
                        Interlocked.Increment(ref this.skippedSequencesCount);
                        Interlocked.Increment(ref this.processedSequencesCount);

                    // if the sequence contains any gap symbols then ignore the sequence.
                    bool skipSequence = false;
                    foreach (byte symbol in gapSymbols)
                        for (long index = 0; index < sequence.Count; ++index)
                            if (sequence[index] == symbol)
                                skipSequence = true;

                        if (skipSequence)

                    if (skipSequence)
                        Interlocked.Increment(ref this.skippedSequencesCount);
                        Interlocked.Increment(ref this.processedSequencesCount);

                    // if the blocking collection count is exceeding 2 million wait for 5 sec
                    // so that the task can remove some kmers and creat the nodes.
                    // This will avoid OutofMemoryException
                    while (kmerDataCollection.Count > 2000000)

                    long count = sequence.Count;

                    // generate the kmers from each sequence
                    for (long i = 0; i <= count - this.kmerLength; ++i)
                        IKmerData kmerData = this.GetNewKmerData();
                        bool orientation   = kmerData.SetKmerData(sequence, i, this.kmerLength);
                        kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1));

                    Interlocked.Increment(ref this.processedSequencesCount);


            Task buildKmers = Task.Factory.StartNew(() =>
                while (!kmerDataCollection.IsCompleted)
                    DeBruijnNode newNode = null;
                    if (kmerDataCollection.TryTake(out newNode, -1))
                        // Tree Node Creation

                        // create a new node
                        if (this.root == null)   // first element being added
                            this.root = newNode; // set node as root of the tree
                            newNode = null;

                        int result          = 0;
                        DeBruijnNode temp   = this.root;
                        DeBruijnNode parent = this.root;

                        // Search the tree where the new node should be inserted
                        while (temp != null)
                            result = newNode.NodeValue.CompareTo(temp.NodeValue);
                            if (result == 0)
                                if (temp.KmerCount <= 255)
                            else if (result > 0) // move to right sub-tree
                                parent = temp;
                                temp   = temp.Right;
                            else if (result < 0) // move to left sub-tree
                                parent = temp;
                                temp   = temp.Left;

                        // position found
                        if (result > 0) // add as right child
                            parent.Right = newNode;
                        else if (result < 0) // add as left child
                            parent.Left = newNode;
                    } // End of tree node creation.

            Task.WaitAll(createKmers, buildKmers);

            this.GraphBuildCompleted = true;

            // Generate the links