Пример #1
0
        void ValidateGetSymbolValueMap(AlphabetsTypes option)
        {
            IAlphabet alphabetInstance = null;

            byte[] queryReference = null;
            byte   inputByte1 = 0, inputByte2 = 0, outputByte1 = 0, outputByte2 = 0;

            switch (option)
            {
            case AlphabetsTypes.Protein:
                alphabetInstance = ProteinAlphabet.Instance;
                inputByte1       = (byte)'w';
                outputByte1      = (byte)'W';
                inputByte2       = (byte)'e';
                outputByte2      = (byte)'E';
                break;

            case AlphabetsTypes.Rna:
                alphabetInstance = RnaAlphabet.Instance;
                inputByte1       = (byte)'a';
                outputByte1      = (byte)'A';
                inputByte2       = (byte)'u';
                outputByte2      = (byte)'U';
                break;

            case AlphabetsTypes.Dna:
                alphabetInstance = DnaAlphabet.Instance;
                inputByte1       = (byte)'a';
                outputByte1      = (byte)'A';
                inputByte2       = (byte)'t';
                outputByte2      = (byte)'T';
                break;
            }

            byte output = 0;

            queryReference = alphabetInstance.GetSymbolValueMap();
            output         = queryReference[inputByte1];
            Assert.AreEqual(outputByte1, output);
            output = queryReference[inputByte2];
            Assert.AreEqual(outputByte2, output);
            ApplicationLog.WriteLine(string.Concat(@"Alphabets BVT: Validation of 
                                GetSymbolValueMap method for ", option, " completed successfully."));
        }
Пример #2
0
        /// <summary>
        /// Build graph nodes and edges from list of k-mers.
        /// Creates a node for every unique k-mer (and reverse-complement)
        /// in the read. Then, generates adjacency information between nodes
        /// by computing pairs of nodes that have overlapping regions
        /// between node sequences.
        /// </summary>
        /// <param name="sequences">List of input sequences.</param>
        public void Build(IEnumerable <ISequence> sequences)
        {
            if (sequences == null)
            {
                throw new ArgumentNullException("sequences");
            }

            if (this.kmerLength <= 0)
            {
                throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive);
            }

            BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>();

            Task buildKmers = Task.Factory.StartNew(() =>
            {
                while (!kmerDataCollection.IsCompleted)
                {
                    DeBruijnNode newNode = null;
                    if (kmerDataCollection.TryTake(out newNode, -1))
                    {
                        // Tree Node Creation

                        // create a new node
                        if (this.root == null)   // first element being added
                        {
                            this.root = newNode; // set node as root of the tree
                            this.NodeCount++;
                            continue;
                        }

                        int result          = 0;
                        DeBruijnNode temp   = this.root;
                        DeBruijnNode parent = this.root;

                        // Search the tree where the new node should be inserted
                        while (temp != null)
                        {
                            result = newNode.NodeValue.CompareTo(temp.NodeValue);
                            if (result == 0)
                            {
                                if (temp.KmerCount <= 255)
                                {
                                    temp.KmerCount++;
                                    break;
                                }
                            }
                            else if (result > 0) // move to right sub-tree
                            {
                                parent = temp;
                                temp   = temp.Right;
                            }
                            else if (result < 0) // move to left sub-tree
                            {
                                parent = temp;
                                temp   = temp.Left;
                            }
                        }

                        // position found
                        if (result > 0) // add as right child
                        {
                            parent.Right = newNode;
                            NodeCount++;
                        }
                        else if (result < 0) // add as left child
                        {
                            parent.Left = newNode;
                            NodeCount++;
                        }
                    } // End of tree node creation.
                }
            });

            IAlphabet alphabet = sequences.First().Alphabet;

            byte[]         symbolMap        = alphabet.GetSymbolValueMap();
            HashSet <byte> ambiguousSymbols = alphabet.GetAmbiguousSymbols();
            HashSet <byte> gapSymbols;

            alphabet.TryGetGapSymbols(out gapSymbols);

            // Generate the kmers from the sequences
            foreach (ISequence sequence in sequences)
            {
                // if the blocking collection count is exceeding 2 million wait for 5 sec
                // so that the task can remove some kmers and creat the nodes.
                // This will avoid OutofMemoryException
                while (kmerDataCollection.Count > 2000000)
                {
                    System.Threading.Thread.Sleep(5);
                }

                long   count            = sequence.Count;
                byte[] convertedSymbols = new byte[count];
                bool   skipSequence     = false;

                for (long index = 0; index < count; index++)
                {
                    convertedSymbols[index] = symbolMap[sequence[index]];
                    if (ambiguousSymbols.Contains(convertedSymbols[index]) || gapSymbols.Contains(convertedSymbols[index]))
                    {
                        skipSequence = true;
                        break;
                    }
                }

                if (skipSequence)
                {
                    continue;
                }

                Sequence convertedSequence = new Sequence(sequence.Alphabet, convertedSymbols, false);

                // generate the kmers from each sequence
                for (long i = 0; i <= count - this.kmerLength; ++i)
                {
                    IKmerData kmerData    = this.GetNewKmerData();
                    bool      orientation = kmerData.SetKmerData(convertedSequence, i, this.kmerLength);
                    kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1));
                }
            }

            kmerDataCollection.CompleteAdding();

            Task.WaitAll(buildKmers);

            kmerDataCollection.Dispose();

            // Generate the links
            this.GenerateLinks();
        }