Пример #1
        /// <summary>
        /// Either returns the DeBrujin node associated with the ulong, or
        /// sets it if an old one does not exist
        /// Parallel Note: Is thread safe
        /// </summary>
        /// <returns>The node representing this value</returns>
        public DeBruijnNode SetNewOrGetOld(KmerData32 value)
            int bucket = AssignBucket(value);
            BinaryTreeOfDebrujinNodes curBucket = _buckets[bucket];

            //keep it thread safe for additions
            DeBruijnNode toReturn;
            lock (curBucket)
                toReturn = curBucket.AddOrReturnCurrent(value);
            return toReturn;
Пример #2
 /// <summary>
 /// Tries to add specified value to the BinaryTree.
 /// If the value is already present in the tree then this method returns the value already in the tree.
 /// Useful when two values that are equal by comparison are not equal by reference.
 /// </summary>
 /// <param name="value">Value to add.</param>
 /// <returns>Returns the value added or already in the tree, else returns false.</returns>
 public DeBruijnNode AddOrReturnCurrent(KmerData32 value)
     DeBruijnNode toReturn;
     if (_root == null)
         toReturn = MakeNewNode(value);
         _root = toReturn;
         ulong newKey = value.KmerData;
         DeBruijnNode node = _root;
         while (true)
             ulong currentKey = node.NodeValue.KmerData;
             if (currentKey == newKey)
                 // key already exists.
                 toReturn = node;
             if (newKey < currentKey)
                 // go to left.
                 if (node.Left == null)
                     toReturn = MakeNewNode(value);
                     node.Left = toReturn;
                 node = node.Left;
                 // go to right.
                 if (node.Right == null)
                     toReturn = MakeNewNode(value);
                     node.Right = toReturn;
                 node = node.Right;
     return toReturn;
Пример #3
        /// <summary>
        /// Adds the links between the nodes of the graph.
        /// </summary>
        private void GenerateLinks(KmerDictionary kmerManager)
            // Prepare a mask to remove the bits representing the first nucleotide (or left most bits in the encoded kmer)
            // First calculate how many bits do you have to move down a character until you are at the start of the kmer encoded sequence
            int distancetoShift=2*(KmerLength-1);
            ulong rightMask = ~( ((ulong)3) << distancetoShift);
            Parallel.ForEach(_nodes, node =>
                    DeBruijnNode searchResult = null;
                    KmerData32 searchNodeValue = new KmerData32();
                    // Right Extensions - Remove first position from the value
                    // Remove the left most value by using an exclusive 
                    ulong nextKmer = node.NodeValue.KmerData & rightMask;
                    // Move it over two to get make a position for the next pair of bits to represent a new nucleotide
                    nextKmer= nextKmer << 2;
                    for (ulong i = 0; i < 4; i++)
                        ulong tmpNextKmer = nextKmer | i;// Equivalent to "ACGTA"+"N" where N is the 0-3 encoding for A,C,G,T
                        // Now to set the kmer value to this, the orientationForward value is equal to false if the 
                        // reverse compliment of the kmer is used instead of the kmer value itself.
                        bool matchIsRC = searchNodeValue.SetKmerData(tmpNextKmer, KmerLength);
                        searchResult = kmerManager.TryGetOld(searchNodeValue);
                        if (searchResult != null)
                            node.SetExtensionNode(true, matchIsRC, searchResult);

                    // Left Extensions
                    nextKmer = node.NodeValue.KmerData;
                    //Chop off the right most basepair
                    nextKmer >>= 2;
                    for (ulong i = 0; i < 4; i++) // Cycle through A,C,G,T
                        // Add the character on to the left side of the kmer
                        // Equivalent to "N" + "ACGAT" where the basepair is added on as the 2 bits
                        ulong tmpNextKmer = (i<<distancetoShift) | nextKmer; 
                        bool matchIsRC=searchNodeValue.SetKmerData(tmpNextKmer, KmerLength);
                        searchResult = kmerManager.TryGetOld(searchNodeValue);
                        if (searchResult != null)
                            node.SetExtensionNode(false, matchIsRC, searchResult);

            LinkGenerationCompleted = true;
Пример #4
        /// <summary>
        /// Compares this instance to a specified instance of object and returns an indication of their relative values.
        /// </summary>
        /// <param name="obj">Instance of the object to compare.</param>
        /// <returns>
        ///  A signed number indicating the relative values of this instance. Zero This
        ///  instance is equal to value. Greater than zero This instance is greater than
        ///  value.
        /// </returns>
        public int CompareTo(object obj)
            KmerData32 kmer = (KmerData32)obj;

Пример #5
 /// <summary>
 /// Compares this instance to a specified instance of KmerData32 and returns an indication of their relative values.
 /// </summary>
 /// <param name="kmer">Instance of the KmerData32 to compare.</param>
 /// <returns>
 /// A signed number indicating the relative values of this instance. Zero This
 /// instance is equal to value. Greater than zero This instance is greater than
 /// value.
 /// </returns>
 public int CompareTo(KmerData32 kmer)
Пример #6
        /// <summary>
        /// Compares this instance to a specified instance of object and returns an indication of their relative values.
        /// </summary>
        /// <param name="value">Instance of the object to compare.</param>
        /// <returns>
        ///  A signed number indicating the relative values of this instance. Zero This
        ///  instance is equal to value. Greater than zero This instance is greater than
        ///  value.
        /// </returns>
        public int CompareTo(object value)
            KmerData32 kmer = (KmerData32)value;

Пример #7
 /// <summary>
 /// Compares this instance to a specified instance of IKmerData and returns an indication of their relative values.
 /// </summary>
 /// <param name="other">Instance of the IKmerData to compare.</param>
 /// <returns>
 /// A signed number indicating the relative values of this instance. Zero This
 /// instance is equal to value. Greater than zero This instance is greater than
 /// value.
 /// </returns>
 public int CompareTo(KmerData32 other)
Пример #8
 /// <summary>
 /// Compares this instance to a specified instance of IKmerData and returns an indication of their relative values.
 /// </summary>
 /// <param name="other">Instance of the IKmerData to compare.</param>
 /// <returns>
 /// A signed number indicating the relative values of this instance. Zero This
 /// instance is equal to value. Greater than zero This instance is greater than
 /// value.
 /// </returns>
 public int CompareTo(KmerData32 other)
     return this.KmerData.CompareTo(other.KmerData);
Пример #9
 /// <summary>
 /// Initializes a new instance of the DeBruijnNode class.
 /// </summary>
 public DeBruijnNode(KmerData32 value, byte count)
     this.NodeValue = value;
     this.KmerCount = count;
Пример #10
        /// <summary>
        /// Validate AddRightEndExtension() method of DeBruijnNode 
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeAddRightExtension(string nodeName)
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);

            // Get the input reads and build kmers
            using (FastAParser parser = new FastAParser(filePath))
                IEnumerable<ISequence> sequenceReads = parser.Parse();

                // Build kmers from step1
                this.KmerLength = int.Parse(kmerLength, null);

                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode rightNode = new DeBruijnNode(kmerData, 1);
                node.SetExtensionNode(true, true, rightNode);
                Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.RightExtensionNodesCount);

            ApplicationLog.WriteLine(@"Padena BVT :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully");
Пример #11
        /// <summary>
        ///     Searches for a particular node in the tree.
        /// </summary>
        /// <param name="kmerValue">The node to be searched.</param>
        /// <returns>Actual node in the tree.</returns>
        public DeBruijnNode SearchTree(KmerData32 kmerValue)
            DeBruijnNode startNode = _root;
            while (startNode != null)
                ulong currentValue = startNode.NodeValue.KmerData;
                // parameter value found
                if (currentValue == kmerValue.KmerData)

                startNode = kmerValue.KmerData < currentValue ? startNode.Left : startNode.Right;

            return startNode;
Пример #12
 /// <summary>
 /// Makes a new DeBruijinNode for a kmer, ignores orientation
 /// </summary>
 /// <param name="value">Kmer to make node with</param>
 private DeBruijnNode MakeNewNode(KmerData32 value)
     return new DeBruijnNode(value, 0);
Пример #13
        /// <summary>
        /// Validate the DeBruijnNode ctor by passing the kmer and validating 
        /// the node object.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeCtor(string nodeName)
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode);
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.KmerLengthNode);

            // Get the input reads and build kmers
            FastAParser parser = new FastAParser();
            parser.Open( filePath.Replace("\\", System.IO.Path.DirectorySeparatorChar.ToString()));
                IEnumerable<ISequence> sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build the kmers using assembler
                this.KmerLength = int.Parse(kmerLength, null);
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>((new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                node.SetExtensionNode(false, true, leftnode);

                Assert.AreEqual(lstKmers[1].Kmers.First().Count, node.LeftExtensionNodesCount);

                "Padena BVT : DeBruijnNode ctor() validation for Padena step2 completed successfully");
Пример #14
        /// <summary>
        /// Validate the DeBruijnNode ctor by passing the kmer and validating 
        /// the node object.
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeCtor(string nodeName)
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,
            string nodeExtensionsCount = utilityObj.xmlUtil.GetTextValue(nodeName,
            string kmersCount = utilityObj.xmlUtil.GetTextValue(nodeName,
            string leftNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName,
            string rightNodeExtensionCount = utilityObj.xmlUtil.GetTextValue(nodeName,

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            using (FastAParser parser = new FastAParser(filePath))
                sequenceReads = parser.Parse();

                // Build the kmers using this
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);
                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1);

                node.SetExtensionNode(false, true, leftnode);
                node.SetExtensionNode(true, true, rightnode);

                // Validate DeBruijnNode class properties.
                Assert.AreEqual(nodeExtensionsCount, node.ExtensionsCount.ToString((IFormatProvider)null));
                Assert.AreEqual(kmersCount, node.KmerCount.ToString((IFormatProvider)null));
                Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(leftNodeExtensionCount, node.LeftExtensionNodesCount.ToString((IFormatProvider)null));
                Assert.AreEqual(rightNodeExtensionCount, node.RightExtensionNodesCount.ToString((IFormatProvider)null));

            ApplicationLog.WriteLine("Padena P1 : DeBruijnNode ctor() validation for Padena step2 completed successfully");
Пример #15
        /// <summary>
        /// Validate RemoveExtension() method of DeBruijnNode 
        /// </summary>
        /// <param name="nodeName">xml node name used for different testcases</param>
        internal void ValidateDeBruijnNodeRemoveExtension(string nodeName)
            string filePath = utilityObj.xmlUtil.GetTextValue(nodeName,
            string kmerLength = utilityObj.xmlUtil.GetTextValue(nodeName,

            // Get the input reads and build kmers
            IEnumerable<ISequence> sequenceReads = null;
            FastAParser parser = new FastAParser();
                sequenceReads = parser.Parse().ToList();
            parser.Close ();

                // Build kmers from step1
                this.KmerLength = int.Parse(kmerLength, (IFormatProvider)null);

                IList<KmersOfSequence> lstKmers = new List<KmersOfSequence>(
                    (new SequenceToKmerBuilder()).Build(this.SequenceReads, this.KmerLength));

                // Validate the node creation
                // Create node and add left node.
                ISequence seq = this.SequenceReads.First();
                KmerData32 kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[0].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode node = new DeBruijnNode(kmerData, 1);
                kmerData = new KmerData32();
                kmerData.SetKmerData(seq, lstKmers[1].Kmers.First().Positions[0], this.KmerLength);

                DeBruijnNode leftnode = new DeBruijnNode(kmerData, 1);
                DeBruijnNode rightnode = new DeBruijnNode(kmerData, 1);

                node.SetExtensionNode(false, true, leftnode);
                node.SetExtensionNode(true, true, rightnode);

                // Validates count before removing right and left extension nodes.
                Assert.AreEqual(1, node.RightExtensionNodesCount);
                Assert.AreEqual(1, node.LeftExtensionNodesCount);

                // Remove right and left extension nodes.

                // Validate node after removing right and left extensions.
                Assert.AreEqual(0, node.RightExtensionNodesCount);
                Assert.AreEqual(0, node.LeftExtensionNodesCount);

            ApplicationLog.WriteLine(@"Padena P1 :DeBruijnNode AddRightExtension() validation for Padena step2 completed successfully");
Пример #16
 /// <summary>
 /// Assign a k-mer encoded as a ulong to a bucket
 /// </summary>
 /// <param name="value">kmer value</param>
 /// <returns>bucket index</returns>
 private int AssignBucket(KmerData32 value)
     return (int) (value.KmerData & _hashingMask);
Пример #17
        /// <summary>
        /// Iterates through a sequence producing all possible kmers in it.
        /// </summary>
        /// <param name="sequence"></param>
        /// <param name="kmerLength"></param>
        /// <returns></returns>
        public static      KmerData32[] GetKmers(ISequence sequence, int kmerLength)
            if (sequence == null)
                throw new ArgumentNullException("sequence");

            long count = sequence.Count;

            if (kmerLength > count || kmerLength > MAX_KMER_LENGTH)
                throw new ArgumentException("Invalid k-mer length - cannot exceed " + MAX_KMER_LENGTH, "kmerLength");

            KmerData32[] kmers = new KmerData32[count - kmerLength + 1];

            //First to make a mask to hide higher bits as we move things over
            ulong mask = ulong.MaxValue; //should be all bits in ulong

            mask <<= (kmerLength * 2);   //move mask over filling in regions to keep with zeros
            mask   = ~mask;              //then flip the bits to get the mask

            ulong compressedKmer = 0;

            for (long i = 0; i < count; ++i)
                ulong value;
                switch (sequence[i])
                case 65:     // 'A'
                case 97:     // 'a'
                    value = DNA_A_VALUE;

                case 67:     // 'C'
                case 99:     // 'c'
                    value = DNA_C_VALUE;

                case 71:     // 'G'
                case 103:    // 'g'
                    value = DNA_G_VALUE;

                case 84:     // 'T'
                case 116:    // 't'
                    value = DNA_T_VALUE;

                    throw new ArgumentException("Character not supported");
                compressedKmer = (compressedKmer << 2) + value;
                if (i >= (kmerLength - 1))
                    //hide top bits
                    compressedKmer = compressedKmer & mask;
                    //get reverse compliment
                    KmerData32 nk = new KmerData32();
                    nk.SetKmerData(compressedKmer, kmerLength);
                    kmers[i - kmerLength + 1] = nk;
Пример #18
 /// <summary>
 /// Returns a node for a given k-mer
 /// </summary>
 /// <param name="kmer">The kmer</param>
 /// <returns>true if the item has previously been assigned a serial number; otherwise, false.</returns>
 public DeBruijnNode TryGetOld(KmerData32 kmer)
     int bucketIndex = AssignBucket(kmer);
     BinaryTreeOfDebrujinNodes tree = _buckets[bucketIndex];
     return tree.SearchTree(kmer);
Пример #19
        /// <summary>
        /// Iterates through a sequence producing all possible kmers in it.
        /// </summary>
        /// <param name="sequence"></param>
        /// <param name="kmerLength"></param>
        /// <returns></returns>
        public static KmerData32[] GetKmers(ISequence sequence, int kmerLength)
            if (sequence == null)
                throw new ArgumentNullException("sequence");

            long count = sequence.Count;
            if (kmerLength > count || kmerLength > MAX_KMER_LENGTH)
                throw new ArgumentException("Invalid k-mer length - cannot exceed " + MAX_KMER_LENGTH, "kmerLength");

            KmerData32[] kmers = new KmerData32[count - kmerLength + 1];
            //First to make a mask to hide higher bits as we move things over
            ulong mask = ulong.MaxValue;//should be all bits in ulong
            mask <<= (kmerLength * 2);//move mask over filling in regions to keep with zeros
            mask = ~mask;//then flip the bits to get the mask
            ulong compressedKmer = 0;
            for (long i = 0; i < count; ++i)
                ulong value;
                switch (sequence[i])
                    case 65: // 'A'
                    case 97: // 'a'
                        value = DNA_A_VALUE;
                    case 67: // 'C'
                    case 99: // 'c'
                        value = DNA_C_VALUE;
                    case 71: // 'G'
                    case 103: // 'g'
                        value = DNA_G_VALUE;
                    case 84: // 'T'
                    case 116: // 't'
                        value = DNA_T_VALUE;
                        throw new ArgumentException("Character not supported");
                compressedKmer = (compressedKmer << 2) + value;
                if (i >= (kmerLength - 1))
                    //hide top bits
                    compressedKmer = compressedKmer & mask;
                    //get reverse compliment
                    KmerData32 nk = new KmerData32();
                    nk.SetKmerData(compressedKmer, kmerLength);
                    kmers[i - kmerLength + 1] = nk;
            return kmers;