/// <summary> /// Either returns the DeBrujin node associated with the ulong, or /// sets it if an old one does not exist /// Parallel Note: Is thread safe /// </summary> /// <returns>The node representing this value</returns> public DeBruijnNode SetNewOrGetOld(KmerData32 value) { int bucket = AssignBucket(value); BinaryTreeOfDebrujinNodes curBucket = _buckets[bucket]; //keep it thread safe for additions DeBruijnNode toReturn; lock (curBucket) { toReturn = curBucket.AddOrReturnCurrent(value); } return toReturn; }
/// <summary> /// Tries to add specified value to the BinaryTree. /// If the value is already present in the tree then this method returns the value already in the tree. /// Useful when two values that are equal by comparison are not equal by reference. /// </summary> /// <param name="value">Value to add.</param> /// <returns>Returns the value added or already in the tree, else returns false.</returns> public DeBruijnNode AddOrReturnCurrent(KmerData32 value) { DeBruijnNode toReturn; if (_root == null) { toReturn = MakeNewNode(value); _root = toReturn; } else { ulong newKey = value.KmerData; DeBruijnNode node = _root; while (true) { ulong currentKey = node.NodeValue.KmerData; if (currentKey == newKey) { // key already exists. toReturn = node; break; } if (newKey < currentKey) { // go to left. if (node.Left == null) { toReturn = MakeNewNode(value); node.Left = toReturn; break; } node = node.Left; } else { // go to right. if (node.Right == null) { toReturn = MakeNewNode(value); node.Right = toReturn; break; } node = node.Right; } } } return toReturn; }
/// <summary> /// Adds the links between the nodes of the graph. /// </summary> private void GenerateLinks(KmerDictionary kmerManager) { // Prepare a mask to remove the bits representing the first nucleotide (or left most bits in the encoded kmer) // First calculate how many bits do you have to move down a character until you are at the start of the kmer encoded sequence int distancetoShift=2*(KmerLength-1); ulong rightMask = ~( ((ulong)3) << distancetoShift); Parallel.ForEach(_nodes, node => { DeBruijnNode searchResult = null; KmerData32 searchNodeValue = new KmerData32(); // Right Extensions - Remove first position from the value // Remove the left most value by using an exclusive ulong nextKmer = node.NodeValue.KmerData & rightMask; // Move it over two to get make a position for the next pair of bits to represent a new nucleotide nextKmer= nextKmer << 2; for (ulong i = 0; i < 4; i++) { ulong tmpNextKmer = nextKmer | i;// Equivalent to "ACGTA"+"N" where N is the 0-3 encoding for A,C,G,T // Now to set the kmer value to this, the orientationForward value is equal to false if the // reverse compliment of the kmer is used instead of the kmer value itself. bool matchIsRC = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength); searchResult = kmerManager.TryGetOld(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(true, matchIsRC, searchResult); } } // Left Extensions nextKmer = node.NodeValue.KmerData; //Chop off the right most basepair nextKmer >>= 2; for (ulong i = 0; i < 4; i++) // Cycle through A,C,G,T { // Add the character on to the left side of the kmer // Equivalent to "N" + "ACGAT" where the basepair is added on as the 2 bits ulong tmpNextKmer = (i<<distancetoShift) | nextKmer; bool matchIsRC=searchNodeValue.SetKmerData(tmpNextKmer, KmerLength); searchResult = kmerManager.TryGetOld(searchNodeValue); if (searchResult != null) { node.SetExtensionNode(false, matchIsRC, searchResult); } } }); LinkGenerationCompleted = true; }
/// <summary> /// Compares this instance to a specified instance of object and returns an indication of their relative values. /// </summary> /// <param name="obj">Instance of the object to compare.</param> /// <returns> /// A signed number indicating the relative values of this instance. Zero This /// instance is equal to value. Greater than zero This instance is greater than /// value. /// </returns> public int CompareTo(object obj) { KmerData32 kmer = (KmerData32)obj; return(this.kmerData.CompareTo(kmer.kmerData)); }
/// <summary> /// Compares this instance to a specified instance of KmerData32 and returns an indication of their relative values. /// </summary> /// <param name="kmer">Instance of the KmerData32 to compare.</param> /// <returns> /// A signed number indicating the relative values of this instance. Zero This /// instance is equal to value. Greater than zero This instance is greater than /// value. /// </returns> public int CompareTo(KmerData32 kmer) { return(this.kmerData.CompareTo(kmer.kmerData)); }
/// <summary> /// Compares this instance to a specified instance of object and returns an indication of their relative values. /// </summary> /// <param name="value">Instance of the object to compare.</param> /// <returns> /// A signed number indicating the relative values of this instance. Zero This /// instance is equal to value. Greater than zero This instance is greater than /// value. /// </returns> public int CompareTo(object value) { KmerData32 kmer = (KmerData32)value; return(this.KmerData.CompareTo(kmer.KmerData)); }
/// <summary> /// Compares this instance to a specified instance of IKmerData and returns an indication of their relative values. /// </summary> /// <param name="other">Instance of the IKmerData to compare.</param> /// <returns> /// A signed number indicating the relative values of this instance. Zero This /// instance is equal to value. Greater than zero This instance is greater than /// value. /// </returns> public int CompareTo(KmerData32 other) { return(this.KmerData.CompareTo(other.KmerData)); }
/// <summary> /// Compares this instance to a specified instance of IKmerData and returns an indication of their relative values. /// </summary> /// <param name="other">Instance of the IKmerData to compare.</param> /// <returns> /// A signed number indicating the relative values of this instance. Zero This /// instance is equal to value. Greater than zero This instance is greater than /// value. /// </returns> public int CompareTo(KmerData32 other) { return this.KmerData.CompareTo(other.KmerData); }
/// <summary> /// Initializes a new instance of the DeBruijnNode class. /// </summary> public DeBruijnNode(KmerData32 value, byte count) { this.NodeValue = value; this.KmerCount = count; }
/// <summary> /// Validate AddRightEndExtension() method of DeBruijnNode /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeAddRightExtension(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers using (FastAParser parser = new FastAParser(filePath)) { IEnumerable<ISequence> sequenceReads = parser.Parse(); // Build kmers from step1 this.KmerLength = int.Parse(kmerLength, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode rightNode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(true, true, rightNode); Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.RightExtensionNodesCount); } ApplicationLog.WriteLine(@"Padena BVT :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully"); }
/// <summary> /// Searches for a particular node in the tree. /// </summary> /// <param name="kmerValue">The node to be searched.</param> /// <returns>Actual node in the tree.</returns> public DeBruijnNode SearchTree(KmerData32 kmerValue) { DeBruijnNode startNode = _root; while (startNode != null) { ulong currentValue = startNode.NodeValue.KmerData; // parameter value found if (currentValue == kmerValue.KmerData) break; startNode = kmerValue.KmerData < currentValue ? startNode.Left : startNode.Right; } return startNode; }
/// <summary> /// Makes a new DeBruijinNode for a kmer, ignores orientation /// </summary> /// <param name="value">Kmer to make node with</param> private DeBruijnNode MakeNewNode(KmerData32 value) { Count++; return new DeBruijnNode(value, 0); }
/// <summary> /// Validate the DeBruijnNode ctor by passing the kmer and validating /// the node object. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeCtor(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers FastAParser parser = new FastAParser(); parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString())); IEnumerable<ISequence> sequenceReads = parser.Parse().ToList(); parser.Close (); // Build the kmers using assembler this.KmerLength = int.Parse(kmerLength, null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.LeftExtensionNodesCount); ApplicationLog.WriteLine( "Padena BVT : DeBruijnNode ctor() validation for Padena step2 completed successfully"); }
/// <summary> /// Validate the DeBruijnNode ctor by passing the kmer and validating /// the node object. /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeCtor(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); string nodeExtensionsCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.NodeExtensionsCountNode); string kmersCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmersCountNode); string leftNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.LeftNodeExtensionsCountNode); string rightNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.RightNodeExtensionsCountNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; using (FastAParser parser = new FastAParser(filePath)) { sequenceReads = parser.Parse(); // Build the kmers using this this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>( (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); node.SetExtensionNode(true, true, rightnode); // Validate DeBruijnNode class properties. Assert.AreEqual(nodeExtensionsCount, node.ExtensionsCount.ToString((IFormatProvider)null)); Assert.AreEqual(kmersCount, node.KmerCount.ToString((IFormatProvider)null)); Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null)); Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null)); } ApplicationLog.WriteLine("Padena P1 : DeBruijnNode ctor() validation for Padena step2 completed successfully"); }
/// <summary> /// Validate RemoveExtension() method of DeBruijnNode /// </summary> /// <param name="nodeName">xml node name used for different testcases</param> internal void ValidateDeBruijnNodeRemoveExtension(string nodeName) { string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode); // Get the input reads and build kmers IEnumerable<ISequence> sequenceReads = null; FastAParser parser = new FastAParser(); parser.Open(filePath); sequenceReads = parser.Parse().ToList(); parser.Close (); // Build kmers from step1 this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null); this.SequenceReads.Clear(); this.SetSequenceReads(sequenceReads.ToList()); IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>( (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength)); // Validate the node creation // Create node and add left node. ISequence seq = this.SequenceReads.First(); KmerData32 kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode node = new DeBruijnNode(kmerData, 1); kmerData = new KmerData32(); kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength); DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1); DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1); node.SetExtensionNode(false, true, leftnode); node.SetExtensionNode(true, true, rightnode); // Validates count before removing right and left extension nodes. Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.RightExtensionNodesCount); Assert.AreEqual(1, node.RightExtensionNodesCount); Assert.AreEqual(1, node.LeftExtensionNodesCount); // Remove right and left extension nodes. node.RemoveExtensionThreadSafe(rightnode); node.RemoveExtensionThreadSafe(leftnode); // Validate node after removing right and left extensions. Assert.AreEqual(0, node.RightExtensionNodesCount); Assert.AreEqual(0, node.LeftExtensionNodesCount); ApplicationLog.WriteLine(@"Padena P1 :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully"); }
/// <summary> /// Assign a k-mer encoded as a ulong to a bucket /// </summary> /// <param name="value">kmer value</param> /// <returns>bucket index</returns> private int AssignBucket(KmerData32 value) { return (int) (value.KmerData & _hashingMask); }
/// <summary> /// Iterates through a sequence producing all possible kmers in it. /// </summary> /// <param name="sequence"></param> /// <param name="kmerLength"></param> /// <returns></returns> public static KmerData32[] GetKmers(ISequence sequence, int kmerLength) { if (sequence == null) { throw new ArgumentNullException("sequence"); } long count = sequence.Count; if (kmerLength > count || kmerLength > MAX_KMER_LENGTH) { throw new ArgumentException("Invalid k-mer length - cannot exceed " + MAX_KMER_LENGTH, "kmerLength"); } KmerData32[] kmers = new KmerData32[count - kmerLength + 1]; //First to make a mask to hide higher bits as we move things over ulong mask = ulong.MaxValue; //should be all bits in ulong mask <<= (kmerLength * 2); //move mask over filling in regions to keep with zeros mask = ~mask; //then flip the bits to get the mask ulong compressedKmer = 0; for (long i = 0; i < count; ++i) { ulong value; switch (sequence[i]) { case 65: // 'A' case 97: // 'a' value = DNA_A_VALUE; break; case 67: // 'C' case 99: // 'c' value = DNA_C_VALUE; break; case 71: // 'G' case 103: // 'g' value = DNA_G_VALUE; break; case 84: // 'T' case 116: // 't' value = DNA_T_VALUE; break; default: throw new ArgumentException("Character not supported"); } compressedKmer = (compressedKmer << 2) + value; if (i >= (kmerLength - 1)) { //hide top bits compressedKmer = compressedKmer & mask; //get reverse compliment KmerData32 nk = new KmerData32(); nk.SetKmerData(compressedKmer, kmerLength); kmers[i - kmerLength + 1] = nk; } } return(kmers); }
/// <summary> /// Returns a node for a given k-mer /// </summary> /// <param name="kmer">The kmer</param> /// <returns>true if the item has previously been assigned a serial number; otherwise, false.</returns> public DeBruijnNode TryGetOld(KmerData32 kmer) { int bucketIndex = AssignBucket(kmer); BinaryTreeOfDebrujinNodes tree = _buckets[bucketIndex]; return tree.SearchTree(kmer); }
/// <summary> /// Iterates through a sequence producing all possible kmers in it. /// </summary> /// <param name="sequence"></param> /// <param name="kmerLength"></param> /// <returns></returns> public static KmerData32[] GetKmers(ISequence sequence, int kmerLength) { if (sequence == null) throw new ArgumentNullException("sequence"); long count = sequence.Count; if (kmerLength > count || kmerLength > MAX_KMER_LENGTH) throw new ArgumentException("Invalid k-mer length - cannot exceed " + MAX_KMER_LENGTH, "kmerLength"); KmerData32[] kmers = new KmerData32[count - kmerLength + 1]; //First to make a mask to hide higher bits as we move things over ulong mask = ulong.MaxValue;//should be all bits in ulong mask <<= (kmerLength * 2);//move mask over filling in regions to keep with zeros mask = ~mask;//then flip the bits to get the mask ulong compressedKmer = 0; for (long i = 0; i < count; ++i) { ulong value; switch (sequence[i]) { case 65: // 'A' case 97: // 'a' value = DNA_A_VALUE; break; case 67: // 'C' case 99: // 'c' value = DNA_C_VALUE; break; case 71: // 'G' case 103: // 'g' value = DNA_G_VALUE; break; case 84: // 'T' case 116: // 't' value = DNA_T_VALUE; break; default: throw new ArgumentException("Character not supported"); } compressedKmer = (compressedKmer << 2) + value; if (i >= (kmerLength - 1)) { //hide top bits compressedKmer = compressedKmer & mask; //get reverse compliment KmerData32 nk = new KmerData32(); nk.SetKmerData(compressedKmer, kmerLength); kmers[i - kmerLength + 1] = nk; } } return kmers; }