void ValidateGetSymbolValueMap(AlphabetsTypes option) { IAlphabet alphabetInstance = null; byte[] queryReference = null; byte inputByte1 = 0, inputByte2 = 0, outputByte1 = 0, outputByte2 = 0; switch (option) { case AlphabetsTypes.Protein: alphabetInstance = ProteinAlphabet.Instance; inputByte1 = (byte)'w'; outputByte1 = (byte)'W'; inputByte2 = (byte)'e'; outputByte2 = (byte)'E'; break; case AlphabetsTypes.Rna: alphabetInstance = RnaAlphabet.Instance; inputByte1 = (byte)'a'; outputByte1 = (byte)'A'; inputByte2 = (byte)'u'; outputByte2 = (byte)'U'; break; case AlphabetsTypes.Dna: alphabetInstance = DnaAlphabet.Instance; inputByte1 = (byte)'a'; outputByte1 = (byte)'A'; inputByte2 = (byte)'t'; outputByte2 = (byte)'T'; break; } byte output = 0; queryReference = alphabetInstance.GetSymbolValueMap(); output = queryReference[inputByte1]; Assert.AreEqual(outputByte1, output); output = queryReference[inputByte2]; Assert.AreEqual(outputByte2, output); ApplicationLog.WriteLine(string.Concat(@"Alphabets BVT: Validation of GetSymbolValueMap method for ", option, " completed successfully.")); }
/// <summary> /// Build graph nodes and edges from list of k-mers. /// Creates a node for every unique k-mer (and reverse-complement) /// in the read. Then, generates adjacency information between nodes /// by computing pairs of nodes that have overlapping regions /// between node sequences. /// </summary> /// <param name="sequences">List of input sequences.</param> public void Build(IEnumerable <ISequence> sequences) { if (sequences == null) { throw new ArgumentNullException("sequences"); } if (this.kmerLength <= 0) { throw new ArgumentException(Properties.Resource.KmerLengthShouldBePositive); } BlockingCollection <DeBruijnNode> kmerDataCollection = new BlockingCollection <DeBruijnNode>(); Task buildKmers = Task.Factory.StartNew(() => { while (!kmerDataCollection.IsCompleted) { DeBruijnNode newNode = null; if (kmerDataCollection.TryTake(out newNode, -1)) { // Tree Node Creation // create a new node if (this.root == null) // first element being added { this.root = newNode; // set node as root of the tree this.NodeCount++; continue; } int result = 0; DeBruijnNode temp = this.root; DeBruijnNode parent = this.root; // Search the tree where the new node should be inserted while (temp != null) { result = newNode.NodeValue.CompareTo(temp.NodeValue); if (result == 0) { if (temp.KmerCount <= 255) { temp.KmerCount++; break; } } else if (result > 0) // move to right sub-tree { parent = temp; temp = temp.Right; } else if (result < 0) // move to left sub-tree { parent = temp; temp = temp.Left; } } // position found if (result > 0) // add as right child { parent.Right = newNode; NodeCount++; } else if (result < 0) // add as left child { parent.Left = newNode; NodeCount++; } } // End of tree node creation. } }); IAlphabet alphabet = sequences.First().Alphabet; byte[] symbolMap = alphabet.GetSymbolValueMap(); HashSet <byte> ambiguousSymbols = alphabet.GetAmbiguousSymbols(); HashSet <byte> gapSymbols; alphabet.TryGetGapSymbols(out gapSymbols); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { // if the blocking collection count is exceeding 2 million wait for 5 sec // so that the task can remove some kmers and creat the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > 2000000) { System.Threading.Thread.Sleep(5); } long count = sequence.Count; byte[] convertedSymbols = new byte[count]; bool skipSequence = false; for (long index = 0; index < count; index++) { convertedSymbols[index] = symbolMap[sequence[index]]; if (ambiguousSymbols.Contains(convertedSymbols[index]) || gapSymbols.Contains(convertedSymbols[index])) { skipSequence = true; break; } } if (skipSequence) { continue; } Sequence convertedSequence = new Sequence(sequence.Alphabet, convertedSymbols, false); // generate the kmers from each sequence for (long i = 0; i <= count - this.kmerLength; ++i) { IKmerData kmerData = this.GetNewKmerData(); bool orientation = kmerData.SetKmerData(convertedSequence, i, this.kmerLength); kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1)); } } kmerDataCollection.CompleteAdding(); Task.WaitAll(buildKmers); kmerDataCollection.Dispose(); // Generate the links this.GenerateLinks(); }