/// <summary> /// Returns a node for a given k-mer /// </summary> /// <param name="kmer">The kmer</param> /// <returns>true if the item has previously been assigned a serial number; otherwise, false.</returns> public DeBruijnNode TryGetOld(KmerData32 kmer) { int bucketIndex = assignBucket(kmer); var tree = buckets[bucketIndex]; return(tree.SearchTree(kmer)); }
/// <summary> /// Returns a node for a given k-mer /// </summary> /// <param name="kmer">The kmer</param> /// <returns>true if the item has previously been assigned a serial number; otherwise, false.</returns> public DeBruijnNode TryGetOld(KmerData32 kmer) { int bucketIndex = AssignBucket(kmer); BinaryTreeOfDebrujinNodes tree = _buckets[bucketIndex]; return(tree.SearchTree(kmer)); }
/// <summary> /// Add a line to each debruijin node if it corresponds to a /// kmer from a single position in a reference genome, /// </summary> protected void PaintKmersWithReference() { List <int> missingLocs = new List <int> (); var refKmerPositions = SequenceToKmerBuilder.BuildKmerDictionary(ReferenceGenome.ReferenceSequence, this.KmerLength); int KmersPainted = 0; int KmersSkipped = 0; DeBruijnGraph graph = this.Graph; long totalNodes = graph.NodeCount; foreach (var v in refKmerPositions) { ISequence seq = v.Key; IList <long> locations = v.Value; if (locations.Count == 1) { var kmerData = new KmerData32(); kmerData.SetKmerData(seq, 0, this.KmerLength); DeBruijnNode matchingNode = this.Graph.KmerManager.SetNewOrGetOld(kmerData, false); if (matchingNode != null) { matchingNode.ReferenceGenomePosition = (short)locations [0]; KmersPainted++; if (matchingNode.ReferenceGenomePosition < 0) { throw new Exception(); } } else { missingLocs.Add((int)locations [0]); } } else { KmersSkipped += locations.Count; } } if (false && OutputDiagnosticInformation) { StreamWriter sw = new StreamWriter("OutMissing.csv"); foreach (int i in missingLocs) { sw.WriteLine(i.ToString()); } sw.Close(); } double percentKmersSkipped = 100.0 * (KmersSkipped) / ((double)(KmersPainted + KmersSkipped)); if (percentKmersSkipped > 95.0) { throw new InvalidProgramException("Reference Genome Skipped over 95% of Kmers"); } double percentHit = KmersPainted / (double)refKmerPositions.Count; RaiseMessage("A total of " + (100.0 * percentHit).ToString() + "% nodes in the reference were painted"); PercentNodesPainted = 100.0 * KmersPainted / (double)totalNodes; RaiseMessage(PercentNodesPainted.ToString("n2") + " % of nodes painted, for a total of " + KmersPainted.ToString() + " painted."); RaiseMessage(percentKmersSkipped.ToString("n2") + " % of Kmers were skipped for being in multiple locations"); }
/// <summary> /// Adds the links between the nodes of the graph. /// </summary> private void GenerateLinks(KmerDictionary kmerManager) { // Prepare a mask to remove the bits representing the first nucleotide (or left most bits in the encoded kmer) // First calculate how many bits do you have to move down a character until you are at the start of the kmer encoded sequence int distancetoShift = 2 * (KmerLength - 1); ulong rightMask = ~(((ulong)3) << distancetoShift); Parallel.ForEach(_nodes, node => { DeBruijnNode searchResult = null; KmerData32 searchNodeValue = new KmerData32(); // Right Extensions - Remove first position from the value // Remove the left most value by using an exclusive ulong nextKmer = node.NodeValue.KmerData & rightMask; // Move it over two to get make a position for the next pair of bits to represent a new nucleotide nextKmer = nextKmer << 2; for (ulong i = 0; i < 4; i++) { ulong tmpNextKmer = nextKmer | i; // Equivalent to "ACGTA"+"N" where N is the 0-3 encoding for A,C,G,T // Now to set the kmer value to this, the orientationForward value is equal to false if the // reverse compliment of the kmer is used instead of the kmer value itself. bool matchIsRC = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength); searchResult = kmerManager.TryGetOld(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(true, matchIsRC, searchResult); } } // Left Extensions nextKmer = node.NodeValue.KmerData; //Chop off the right most basepair nextKmer >>= 2; for (ulong i = 0; i < 4; i++) // Cycle through A,C,G,T { // Add the character on to the left side of the kmer // Equivalent to "N" + "ACGAT" where the basepair is added on as the 2 bits ulong tmpNextKmer = (i << distancetoShift) | nextKmer; bool matchIsRC = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength); searchResult = kmerManager.TryGetOld(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(false, matchIsRC, searchResult); } } }); LinkGenerationCompleted = true; }
/// <summary> /// Either returns the DeBrujin node associated with the ulong, or /// sets it if an old one does not exist /// Parallel Note: Is thread safe /// </summary> /// <returns>The node representing this value</returns> public DeBruijnNode SetNewOrGetOld(KmerData32 value) { int bucket = AssignBucket(value); BinaryTreeOfDebrujinNodes curBucket = _buckets[bucket]; //keep it thread safe for additions DeBruijnNode toReturn; lock (curBucket) { toReturn = curBucket.AddOrReturnCurrent(value); } return(toReturn); }
/// <summary> /// Either returns the DeBrujin node associated with the ulong, or /// sets it if an old one does not exist /// /// Parallel Note: Is thread safe /// </summary> /// <returns>The node representing this value</returns> public DeBruijnNode SetNewOrGetOld(KmerData32 value, bool makeNewIfNotFound = true) { int bucket = assignBucket(value); BinaryTreeOfDebruijnNodes curBucket = buckets[bucket]; //keep it thread safe for additions DeBruijnNode toReturn; lock (curBucket) { toReturn = curBucket.AddOrReturnCurrent(value, makeNewIfNotFound); } return(toReturn); }
/// <summary> /// Tries to add specified value to the BinaryTree. /// If the value is already present in the tree then this method returns the value already in the tree. /// Useful when two values that are equal by comparison are not equal by reference. /// </summary> /// <param name="value">Value to add.</param> /// <returns>Returns the value added or already in the tree, else returns false.</returns> public DeBruijnNode AddOrReturnCurrent(KmerData32 value) { DeBruijnNode toReturn; if (_root == null) { toReturn = MakeNewNode(value); _root = toReturn; } else { ulong newKey = value.KmerData; DeBruijnNode node = _root; while (true) { ulong currentKey = node.NodeValue.KmerData; if (currentKey == newKey) { // key already exists. toReturn = node; break; } if (newKey < currentKey) { // go to left. if (node.Left == null) { toReturn = MakeNewNode(value); node.Left = toReturn; break; } node = node.Left; } else { // go to right. if (node.Right == null) { toReturn = MakeNewNode(value); node.Right = toReturn; break; } node = node.Right; } } } return(toReturn); }
/// <summary> /// Searches for a particular node in the tree. /// </summary> /// <param name="kmerValue">The node to be searched.</param> /// <returns>Actual node in the tree.</returns> public DeBruijnNode SearchTree(KmerData32 kmerValue) { DeBruijnNode startNode = Root; while (startNode != null) { int result = kmerValue.CompareTo(startNode.NodeValue); if (result == 0) // not found { break; } // Search left if the value is smaller than the current node startNode = result < 0 ? startNode.Left : startNode.Right; } return(startNode); }
/// <summary> /// Searches for a particular node in the tree. /// </summary> /// <param name="kmerValue">The node to be searched.</param> /// <returns>Actual node in the tree.</returns> public DeBruijnNode SearchTree(KmerData32 kmerValue) { DeBruijnNode startNode = _root; while (startNode != null) { ulong currentValue = startNode.NodeValue.KmerData; // parameter value found if (currentValue == kmerValue.KmerData) { break; } startNode = kmerValue.KmerData < currentValue ? startNode.Left : startNode.Right; } return(startNode); }
/// <summary> /// Searches for a particular node in the tree. /// </summary> /// <param name="kmerValue">The node to be searched.</param> /// <returns>Actual node in the tree.</returns> public DeBruijnNode SearchTree(KmerData32 kmerValue) { DeBruijnNode startNode = this.root; while (startNode != null) { ulong currentValue = startNode.NodeValue.KmerData; // parameter value found if (currentValue == kmerValue.KmerData) { break; } else if (kmerValue.KmerData < currentValue) { // Search left if the value is smaller than the current node startNode = startNode.Left; // search left } else { startNode = startNode.Right; // search right } } return(startNode); }
/// <summary> /// Delete nodes marked for erosion. Update adjacent nodes to update their extension tables. /// </summary> /// <param name="graph">De Bruijn Graph.</param> public static int RemovePathologicalNodes(DeBruijnGraph graph) { //Basic strategy here, start at all reference nodes, go find everything that isn't in there //and remove it. DeBruijnGraph.ValidateGraph(graph); var badSeq = Enumerable.Repeat((byte)'A', graph.KmerLength).ToArray(); var seq = new Bio.Sequence(Bio.Alphabets.DNA, badSeq, false); var badkmer1 = KmerData32.GetKmers(seq, graph.KmerLength).First().KmerData; badSeq = Enumerable.Repeat((byte)'G', graph.KmerLength).ToArray(); seq = new Bio.Sequence(Bio.Alphabets.DNA, badSeq, false); var badkmer2 = KmerData32.GetKmers(seq, graph.KmerLength).First().KmerData; var badNodeCount = 0; foreach (var x in graph.GetNodes()) { if (x.NodeValue.KmerData == badkmer1 || x.NodeValue.KmerData == badkmer2 || x.ContainsSelfReference) { x.MarkNodeForDelete(); Interlocked.Increment(ref badNodeCount); } } foreach (var node in graph.GetNodes()) { node.RemoveMarkedExtensions(); } //Now to delete them, since they are not connected to anything we are keeping, //no need to alter the graph structure graph.RemoveMarkedNodes(); return(badNodeCount); }
/// <summary> /// Makes a new DeBruijinNode for a kmer, ignores orientation /// </summary> /// <param name="value">Kmer to make node with</param> private DeBruijnNode makeNewNode(KmerData32 value) { Count++; return(new DeBruijnNode(value, 0)); }
/// <summary> /// Assign a k-mer encoded as a ulong to a bucket /// </summary> /// <param name="value">kmer value</param> /// <returns>bucket index</returns> private int AssignBucket(KmerData32 value) { return((int)(value.KmerData & _hashingMask)); }
/// <summary> /// Build graph nodes and edges from list of k-mers. /// Creates a node for every unique k-mer (and reverse-complement) /// in the read. Then, generates adjacency information between nodes /// by computing pairs of nodes that have overlapping regions /// between node sequences. /// </summary> /// <param name="sequences">List of input sequences.</param> /// <param name="destroyKmerManagerAfterwards">MT Assembler specific flag public void Build(IEnumerable <ISequence> sequences, bool destroyKmerManagerAfterwards = true) { if (sequences == null) { throw new ArgumentNullException("sequences"); } // Build the dictionary of kmers to debruijin nodes var kmerManager = new KmerDictionary(); var kmerDataCollection = new BlockingCollection <List <KmerData32> >(); // Create the producer task Task theProducer = Task.Factory.StartNew(() => { Thread.BeginCriticalRegion(); try { int i = 0; var kmerList = new List <KmerData32>(BlockSize); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { #if DEBUG i++; if (i % 50000 == 0) { //TODO: This is reported each 5 minutes anyway. Console.WriteLine("Parsed: " + i.ToString() + " reads"); } #endif // if the sequence alphabet is not of type DNA then ignore it. bool skipSequence = false; if (sequence.Alphabet != Alphabets.NoGapDNA || sequence.Count < _kmerLength) { skipSequence = true; #if FALSE Console.WriteLine(sequence.Alphabet.ToString()); var qs = sequence as Sequence; var f = new Sequence(qs); var s = f.ConvertToString(); byte[] acceptable = new byte[] { 65, 67, 71, 84 }; var s3 = new Sequence(qs.Alphabet, f.Where(x => !acceptable.Contains(x)).ToArray()); Console.WriteLine("BAD: " + s3.ConvertToString()); Console.WriteLine(f.ConvertToString()); // var b = sequence as Sequence; //Console.WriteLine((sequence as Sequence).ConvertToString()); #endif } if (skipSequence) { Interlocked.Increment(ref this._skippedSequencesCount); Interlocked.Increment(ref this._processedSequencesCount); continue; } // If the blocking collection count is exceeding 2 million kmers wait for 5 sec // so that the task can remove some kmers and create the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > StopAddThreshold) { Thread.Sleep(2); } // Convert sequences to k-mers var kmers = KmerData32.GetKmers(sequence, this.KmerLength); kmerList.AddRange(kmers); // Most reads are <=150 basepairs, so this should avoid having to grow the list // by keeping it below blockSize if (kmerList.Count > AddThreshold) { kmerDataCollection.Add(kmerList); kmerList = new List <KmerData32>(BlockSize); } Interlocked.Increment(ref this._processedSequencesCount); Thread.EndCriticalRegion(); } if (kmerList.Count <= AddThreshold) { kmerDataCollection.Add(kmerList); } } finally { kmerDataCollection.CompleteAdding(); } }); if (true)// (!Bio.CrossPlatform.Environment.RunningInMono) { // Consume k-mers by adding them to binary tree structure as nodes Parallel.ForEach(kmerDataCollection.GetConsumingEnumerable(), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, newKmerList => { foreach (KmerData32 newKmer in newKmerList) { // Create Vertex DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer); Debug.Assert(newKmer.KmerData == node.NodeValue.KmerData); } }); } else { foreach (var newKmerList in kmerDataCollection.GetConsumingEnumerable()) { foreach (KmerData32 newKmer in newKmerList) { // Create Vertex DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer); Debug.Assert(newKmer.KmerData == node.NodeValue.KmerData); } } } // Done filling binary tree theProducer.Wait(); // Make sure task is finished - also rethrows any exception here. kmerDataCollection.Dispose(); // NOTE: To speed enumeration make the nodes into an array and dispose of the collection this._nodeCount = kmerManager.NodeCount; this._nodes = kmerManager.GenerateNodeArray(); // Generate the links this.GenerateLinks(kmerManager); if (destroyKmerManagerAfterwards) { // Since we no longer need to search for values delete tree structure, also set left and right nodes of child array to null // So that they are available for GC if no longer needed kmerManager = null; foreach (DeBruijnNode node in _nodes) { node.Left = null; node.Right = null; } } else { KmerManager = kmerManager; } this.GraphBuildCompleted = true; }
/// <summary> /// Assign a k-mer encoded as a ulong to a bucket /// </summary> /// <param name="value">kmer value</param> /// <returns>bucket index</returns> private int assignBucket(KmerData32 value) { //This should be inlined by the JIT, only writing this way for clarity return((int)(value.KmerData & hashingMask)); }
/// <summary> /// Adds the links between the nodes of the graph. /// </summary> private void GenerateLinks() { Parallel.ForEach(GetNodes(), node => { DeBruijnNode searchResult; KmerData32 searchNodeValue = new KmerData32(); string kmerString, kmerStringRc; if (node.NodeDataOrientation) { kmerString = Encoding.Default.GetString(node.NodeValue.GetKmerData(KmerLength)); kmerStringRc = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(KmerLength)); } else { kmerStringRc = Encoding.Default.GetString(node.NodeValue.GetKmerData(KmerLength)); kmerString = Encoding.Default.GetString(node.NodeValue.GetReverseComplementOfKmerData(KmerLength)); } // Right Extensions string nextKmer = kmerString.Substring(1); string nextKmerRC = kmerStringRc.Substring(0, KmerLength - 1); for (int i = 0; i < _dnaSymbols.Length; i++) { string tmpNextKmer = nextKmer + _dnaSymbols[i]; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), KmerLength); searchResult = SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(true, searchResult.NodeDataOrientation, searchResult); } else { string tmpnextKmerRC = _dnaSymbolsComplement[i] + nextKmerRC; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpnextKmerRC), KmerLength); searchResult = SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(true, !searchResult.NodeDataOrientation, searchResult); } } } // Left Extensions nextKmer = kmerString.Substring(0, KmerLength - 1); nextKmerRC = kmerStringRc.Substring(1); for (int i = 0; i < _dnaSymbols.Length; i++) { string tmpNextKmer = _dnaSymbols[i] + nextKmer; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmer), KmerLength); searchResult = SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(false, searchResult.NodeDataOrientation, searchResult); } else { string tmpNextKmerRC = nextKmerRC + _dnaSymbolsComplement[i]; searchNodeValue.SetKmerData(Encoding.Default.GetBytes(tmpNextKmerRC), KmerLength); searchResult = SearchTree(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(false, !searchResult.NodeDataOrientation, searchResult); } } } }); LinkGenerationCompleted = true; }
/// <summary> /// Tries to add specified value to the tree setting its count to 1. /// If the value is already present in the tree then this method returns the value already in the tree. /// Useful when two values that are equal by comparison are not equal by reference. /// </summary> /// <param name="value">Value to add.</param> /// <returns>Returns the node added or found</returns> public DeBruijnNode AddOrReturnCurrent(KmerData32 value, bool makeNewIfNotFound = true) { DeBruijnNode toReturn = null; if (this.root == null) { toReturn = makeNewNode(value); this.root = toReturn; } else { ulong newKey = value.KmerData; DeBruijnNode node = this.root; while (true) { ulong currentKey = node.NodeValue.KmerData; if (currentKey == newKey) { // key already exists. toReturn = node; break; } else if (newKey < currentKey) { // go to left. if (node.Left == null) { if (makeNewIfNotFound) { toReturn = makeNewNode(value); node.Left = toReturn; } break; } else { node = node.Left; } } else { // go to right. if (node.Right == null) { if (makeNewIfNotFound) { toReturn = makeNewNode(value); node.Right = toReturn; } break; } else { node = node.Right; } } } } if (toReturn != null && toReturn.KmerCount < UInt32.MaxValue) { toReturn.KmerCount++; } return(toReturn); }
public void Build(IEnumerable <ISequence> sequences) { // Size of Kmer List to grab, somewhat arbitrary but want to keep list size below large object threshold, which is ~85 kb const int blockSize = 4096; // When to add list to blocking collection, most short reads are <=151 bp so this should avoid needing to grow the list const int addThreshold = blockSize - 151; // When to pause adding const int stopAddThreshold = 2000000 / blockSize; if (sequences == null) { throw new ArgumentNullException("sequences"); } if (KmerLength > KmerData32.MAX_KMER_LENGTH) { throw new ArgumentException(Properties.Resource.KmerLengthGreaterThan31); } // A dictionary kmers to debruijin nodes KmerDictionary kmerManager = new KmerDictionary(); // Create the producer thread. var kmerDataCollection = new BlockingCollection <List <KmerData32> >(); Task producer = Task.Factory.StartNew(() => { try { List <KmerData32> kmerList = new List <KmerData32>(blockSize); IAlphabet alphabet = Alphabets.DNA; HashSet <byte> gapSymbols; alphabet.TryGetGapSymbols(out gapSymbols); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { // if the sequence alphabet is not of type DNA then ignore it. bool skipSequence = false; if (sequence.Alphabet != Alphabets.DNA) { skipSequence = true; } else { // if the sequence contains any gap symbols then ignore the sequence. foreach (byte symbol in gapSymbols) { for (long index = 0; index < sequence.Count; ++index) { if (sequence[index] == symbol) { skipSequence = true; break; } } if (skipSequence) { break; } } } if (skipSequence) { Interlocked.Increment(ref _skippedSequencesCount); Interlocked.Increment(ref _processedSequencesCount); continue; } // if the blocking collection count is exceeding 2 million kmers wait for 2 sec // so that the task can remove some kmers and create the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > stopAddThreshold) { Task.Delay(TimeSpan.FromSeconds(2)).Wait(); } // Convert sequences to k-mers kmerList.AddRange(KmerData32.GetKmers(sequence, KmerLength)); // Most reads are <=150 basepairs, so this should avoid having to grow the list // by keeping it below blockSize if (kmerList.Count > addThreshold) { kmerDataCollection.Add(kmerList); kmerList = new List <KmerData32>(4092); } Interlocked.Increment(ref _processedSequencesCount); } if (kmerList.Count <= addThreshold) { kmerDataCollection.Add(kmerList); } } finally { kmerDataCollection.CompleteAdding(); } }); // Consume k-mers by addding them to binary tree structure as nodes Parallel.ForEach(kmerDataCollection.GetConsumingEnumerable(), newKmerList => { foreach (KmerData32 newKmer in newKmerList) { // Create Vertex DeBruijnNode node = kmerManager.SetNewOrGetOld(newKmer); // Need to lock node if doing this in parallel if (node.KmerCount <= 255) { lock (node) { node.KmerCount++; } } } }); // Ensure producer exceptions are handled. producer.Wait(); // Done filling binary tree kmerDataCollection.Dispose(); //NOTE: To speed enumeration make the nodes into an array and dispose of the collection _nodeCount = kmerManager.NodeCount; _nodes = kmerManager.GenerateNodeArray(); // Generate the links GenerateLinks(kmerManager); // Since we no longer need to search for values set left and right nodes of child array to null // so that they are available for GC if no longer needed foreach (DeBruijnNode node in _nodes) { node.Left = node.Right = null; } GraphBuildCompleted = true; }
/// <summary> /// Initializes a new instance of the DeBruijnNode class. /// </summary> public DeBruijnNode(KmerData32 value, byte count) { this.NodeValue = value; this.KmerCount = count; }
/// <summary> /// Build graph nodes and edges from list of k-mers. /// Creates a node for every unique k-mer (and reverse-complement) /// in the read. Then, generates adjacency information between nodes /// by computing pairs of nodes that have overlapping regions /// between node sequences. /// </summary> /// <param name="sequences">List of input sequences.</param> public void Build(IEnumerable <ISequence> sequences) { if (sequences == null) { throw new ArgumentNullException("sequences"); } if (KmerLength <= 0) { throw new ArgumentException("KmerLengthShouldBePositive"); } if (KmerLength > MaxKmerLength) { throw new ArgumentException("KmerLengthGreaterThan32"); } var kmerDataCollection = new BlockingCollection <DeBruijnNode>(); Task.Factory.StartNew(() => { try { IAlphabet alphabet = Alphabets.DNA; HashSet <byte> gapSymbols; alphabet.TryGetGapSymbols(out gapSymbols); // Generate the kmers from the sequences foreach (ISequence sequence in sequences) { // if the sequence alphabet is not of type DNA then ignore it. if (sequence.Alphabet != Alphabets.DNA) { Interlocked.Increment(ref _skippedSequencesCount); Interlocked.Increment(ref _processedSequencesCount); continue; } // if the sequence contains any gap symbols then ignore the sequence. bool skipSequence = false; foreach (byte symbol in gapSymbols) { for (long index = 0; index < sequence.Count; ++index) { if (sequence[index] == symbol) { skipSequence = true; break; } } if (skipSequence) { break; } } if (skipSequence) { Interlocked.Increment(ref _skippedSequencesCount); Interlocked.Increment(ref _processedSequencesCount); continue; } // if the blocking collection count is exceeding 2 million wait for 5 sec // so that the task can remove some kmers and creat the nodes. // This will avoid OutofMemoryException while (kmerDataCollection.Count > StopAddThreshold) { Thread.Sleep(5); } // Generate the kmers from each sequence long count = sequence.Count; for (long i = 0; i <= count - KmerLength; ++i) { var kmerData = new KmerData32(); bool orientation = kmerData.SetKmerData(sequence, i, KmerLength); kmerDataCollection.Add(new DeBruijnNode(kmerData, orientation, 1)); } Interlocked.Increment(ref _processedSequencesCount); } } finally { kmerDataCollection.CompleteAdding(); } }); // The main thread will then process all the data - this will loop until the above // task completes adding the kmers. foreach (var newNode in kmerDataCollection.GetConsumingEnumerable()) { // Create a new node if (Root == null) // first element being added { Root = newNode; // set node as root of the tree NodeCount++; continue; } int result = 0; DeBruijnNode temp = Root; DeBruijnNode parent = Root; // Search the tree where the new node should be inserted while (temp != null) { result = newNode.NodeValue.CompareTo(temp.NodeValue); if (result == 0) { if (temp.KmerCount <= 255) { temp.KmerCount++; break; } } else if (result > 0) // move to right sub-tree { parent = temp; temp = temp.Right; } else if (result < 0) // move to left sub-tree { parent = temp; temp = temp.Left; } } // position found if (result > 0) // add as right child { parent.Right = newNode; NodeCount++; } else if (result < 0) // add as left child { parent.Left = newNode; NodeCount++; } } // Done adding - we can throw away the kmer collection as we now have the graph kmerDataCollection.Dispose(); this.GraphBuildCompleted = true; // Generate the links this.GenerateLinks(); }