Esempio n. 1
0
        //method rank() which doesn't use RRR data structure. It returns number of occurenses for given character
        //in input nucleotide sequence
        public int rank(WaveletNode currentNode, int index, char character, ArrayList currentAlphabet)
        {
            if (!currentAlphabet.Contains(character))
            {
                return(0);
            }

            int       mid = (currentAlphabet.Count + 1) / 2;
            int       newIndex;
            ArrayList currentAlphabetSliced = new ArrayList();

            if (getIndex(character, currentAlphabet) < mid)
            {
                newIndex              = index - popcount(currentNode.getBitmap(), index);
                currentNode           = currentNode.getLeftChild();
                currentAlphabetSliced = currentAlphabet.GetRange(0, (currentAlphabet.Count - (mid - 1)));
            }
            else
            {
                newIndex              = popcount(currentNode.getBitmap(), index) - 1;
                currentNode           = currentNode.getRightChild();
                currentAlphabetSliced = currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid));
            }

            if (currentNode != null)
            {
                return(rank(currentNode, newIndex, character, currentAlphabetSliced));
            }
            else
            {
                return(newIndex + 1);
            }
        }
Esempio n. 2
0
        //select() method returns the index of n-th occurence of given character in input nucleotide string.
        //It doesn't use RRR data structure, it uses regular bitmap.
        public int select(int nthOccurrence, char character)
        {
            Interval    alphabeticInterval           = new Interval(0, alphabet.Count - 1);
            WaveletNode currentNode                  = rootNode;
            int         indexOfCharInAlph            = getIndex(character, alphabet);
            bool        characterRepresentedWithZero = true;

            while (alphabeticInterval.isGreaterThanTwo())
            {
                if (alphabeticInterval.getSize() == 3)
                {
                    if (alphabeticInterval.getRightIndex() == indexOfCharInAlph)
                    {
                        characterRepresentedWithZero = false;
                        break;
                    }
                }

                if (indexOfCharInAlph <= alphabeticInterval.getMiddleIndex())
                {
                    currentNode = currentNode.getLeftChild();
                    alphabeticInterval.setRightIndex();
                }
                else
                {
                    currentNode = currentNode.getRightChild();
                    alphabeticInterval.setLeftIndex();
                }
            }

            if (characterRepresentedWithZero)
            {
                if (alphabeticInterval.getLeftIndex() == indexOfCharInAlph)
                {
                    characterRepresentedWithZero = true;
                }
                else
                {
                    characterRepresentedWithZero = false;
                }
            }

            int position = getPositionOfNthOccurrence(currentNode.getBitmap(), nthOccurrence, characterRepresentedWithZero);

            if (position == 0)
            {
                return(-1);
            }

            WaveletNode child = currentNode;

            currentNode = currentNode.getParent();

            while (currentNode != null)
            {
                if (currentNode.getLeftChild().Equals(child))
                {
                    position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, true);
                }
                else
                {
                    position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, false);
                }

                currentNode = currentNode.getParent();
                child       = child.getParent();
            }

            return(position - 1);
        }
Esempio n. 3
0
        //selectRRR() method returns index of n-th occurence of given character in input nucleotide string
        public int selectRRR(int nthOccurrence, char character)
        {
            Interval    alphabeticInterval           = new Interval(0, alphabet.Count - 1);
            WaveletNode currentNode                  = rootNode;
            int         indexOfCharInAlph            = getIndex(character, alphabet);
            bool        characterRepresentedWithZero = true;

            while (alphabeticInterval.isGreaterThanTwo())
            {
                if (alphabeticInterval.getSize() == 3)
                {
                    if (alphabeticInterval.getRightIndex() == indexOfCharInAlph)
                    {
                        characterRepresentedWithZero = false;
                        break;
                    }
                }

                if (indexOfCharInAlph <= alphabeticInterval.getMiddleIndex())
                {
                    currentNode = currentNode.getLeftChild();
                    alphabeticInterval.setRightIndex();
                }
                else
                {
                    currentNode = currentNode.getRightChild();
                    alphabeticInterval.setLeftIndex();
                }
            }

            if (characterRepresentedWithZero)
            {
                if (alphabeticInterval.getLeftIndex() == indexOfCharInAlph)
                {
                    characterRepresentedWithZero = true;
                }
                else
                {
                    characterRepresentedWithZero = false;
                }
            }

            //  bottom-up tree traversal once we have node representing given character
            int position = selectOnBitmap(currentNode, nthOccurrence, characterRepresentedWithZero);

            if (position == 0)
            {
                return(-1); // no n occurrences of character
            }

            WaveletNode child = currentNode;

            currentNode = currentNode.getParent();

            while (currentNode != null)
            {
                if (currentNode.getLeftChild().Equals(child))
                {
                    position = selectOnBitmap(currentNode, position, true);
                }
                else
                {
                    position = selectOnBitmap(currentNode, position, false);
                }

                currentNode = currentNode.getParent();
                child       = child.getParent();
            }

            return(position - 1);
        }
Esempio n. 4
0
        //method rank() which uses RRR data structure. It returns number of occurenses for given character
        //in input nucleotide sequence
        public int rankRRR(WaveletNode currentNode, int index, char character, ArrayList currentAlphabet)
        {
            int blockIndex;
            int superBlockIndex;
            int bitsSum;
            int currentPos;
            int blocksRemaining;

            if (!currentAlphabet.Contains(character))
            {
                return(0);
            }

            int       mid = (currentAlphabet.Count + 1) / 2;
            int       newIndex;
            ArrayList currentAlphabetSliced = new ArrayList();

            blockIndex      = index / currentNode.RRRTable.BlockSize;
            superBlockIndex = blockIndex / (currentNode.RRRTable.SuperblockSize / currentNode.RRRTable.BlockSize);
            if (superBlockIndex == 0)
            {
                currentPos = 0;
                bitsSum    = 0;
            }
            else
            {
                currentPos = currentNode.RRRStruct.superblockOffsets[superBlockIndex - 1];
                bitsSum    = currentNode.RRRStruct.superblockSums[superBlockIndex - 1];
            }

            blocksRemaining = blockIndex - (currentNode.RRRTable.SuperblockSize / currentNode.RRRTable.BlockSize) * superBlockIndex;


            int shift = currentPos;
            int lastClass;
            int lastOffset;
            int klass;
            int offsetBits;

            for (int i = 0; i < blocksRemaining; i++)
            {
                klass      = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2);
                bitsSum   += klass;
                offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, klass)), 2));
                if (offsetBits == 0)
                {
                    offsetBits = 1;
                }
                shift = shift + currentNode.RRRTable.ClassBitsNeeded + offsetBits;
            }

            lastClass = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2);

            offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, lastClass)), 2));
            if (offsetBits == 0)
            {
                offsetBits = 1;
            }
            lastOffset = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift + currentNode.RRRTable.ClassBitsNeeded, offsetBits), 2);
            bitsSum   += popcount(currentNode.RRRTable.TableG[lastClass][lastOffset], index % currentNode.RRRTable.BlockSize);

            if (getIndex(character, currentAlphabet) < mid)
            {
                newIndex              = index - bitsSum;
                currentNode           = currentNode.getLeftChild();
                currentAlphabetSliced = currentAlphabet.GetRange(0, (currentAlphabet.Count - (mid - 1)));
            }
            else
            {
                newIndex              = bitsSum - 1;
                currentNode           = currentNode.getRightChild();
                currentAlphabetSliced = currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid));
            }

            if (currentNode != null)
            {
                return(rankRRR(currentNode, newIndex, character, currentAlphabetSliced));
            }
            else
            {
                return(newIndex + 1);
            }
        }
Esempio n. 5
0
        //method buildWaveletTree() constructs wavelet tree. It calls creation of RRR lookup table and
        //adds RRR data structure to every node.
        public void buildWaveletTree(ArrayList currentAlphabet, String currentLabel, WaveletNode currentNode)
        {
            if (currentAlphabet.Count > 2)
            {
                StringBuilder bitmapBuilder = new StringBuilder();
                StringBuilder leftLabel     = new StringBuilder();
                StringBuilder rightLabel    = new StringBuilder();
                StringBuilder RRRbitmap     = new StringBuilder();
                int           blockSize;
                int           superblockSize;


                int mid = (currentAlphabet.Count + 1) / 2;

                //creation of node bitmap
                foreach (char c in currentLabel)
                {
                    if (getIndex(Char.ToUpper(c), currentAlphabet) < mid)
                    {
                        bitmapBuilder.Append("0");
                        leftLabel.Append(Char.ToUpper(c));
                    }
                    else
                    {
                        bitmapBuilder.Append("1");
                        rightLabel.Append(Char.ToUpper(c));
                    }
                }


                currentNode.setBitmap(bitmapBuilder.ToString());

                blockSize = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2);
                if (blockSize == 0)
                {
                    blockSize = 1;
                }
                superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2)));

                //creation of RRRLookupTable
                currentNode.RRRTable.BlockSize       = blockSize;
                currentNode.RRRTable.SuperblockSize  = superblockSize;
                currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1;
                currentNode.RRRTable.buildTableG();

                //filling the bitmap with additional zeroes so that every block is equal in size
                if (bitmapBuilder.Length % blockSize != 0)
                {
                    do
                    {
                        bitmapBuilder.Append("0");
                    } while (bitmapBuilder.Length % blockSize != 0);
                }

                //creation of bitmap which is being used for creation od RRR data structure
                currentNode.setHelpBitmap(bitmapBuilder.ToString());

                //creation od RRR Data Structure
                int totalPopcount = 0;
                for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize)
                {
                    int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize));
                    RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova
                    RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2))));
                    totalPopcount += popCount;
                    if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length)
                    {
                        currentNode.RRRStruct.superblockSums.Add(totalPopcount);
                        currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length);
                    }
                }
                currentNode.RRRStruct.Bitmap = RRRbitmap.ToString();
                currentNode.setLeftChild(new WaveletNode());
                currentNode.getLeftChild().setParent(currentNode);
                buildWaveletTree(currentAlphabet.GetRange(0, mid), leftLabel.ToString(), currentNode.getLeftChild());

                //if current alphabet has more than 3 characters, than it creates right child and calls recursion on this child
                if (currentAlphabet.Count > 3)
                {
                    currentNode.setRightChild(new WaveletNode());
                    currentNode.getRightChild().setParent(currentNode);
                    buildWaveletTree(currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid)),
                                     rightLabel.ToString(), currentNode.getRightChild());
                }
            }
            else
            {   //creation of child nodes
                if (currentAlphabet.Count == 2)
                {
                    StringBuilder bitmapBuilder = new StringBuilder();
                    StringBuilder RRRbitmap     = new StringBuilder();
                    int           blockSize;
                    int           superblockSize;

                    foreach (char c in currentLabel)
                    {
                        if (getIndex(Char.ToUpper(c), currentAlphabet) + 1 == 1)
                        {
                            bitmapBuilder.Append("0");
                        }
                        else
                        {
                            bitmapBuilder.Append("1");
                        }
                    }

                    currentNode.setBitmap(bitmapBuilder.ToString());
                    blockSize      = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2);
                    superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2)));
                    currentNode.RRRTable.BlockSize       = blockSize;
                    currentNode.RRRTable.SuperblockSize  = superblockSize;
                    currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1;
                    currentNode.RRRTable.buildTableG();

                    if (bitmapBuilder.Length % blockSize != 0)
                    {
                        do
                        {
                            bitmapBuilder.Append("0");
                        } while (bitmapBuilder.Length % blockSize != 0);
                    }

                    currentNode.setHelpBitmap(bitmapBuilder.ToString());

                    int totalPopcount = 0;
                    for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize)
                    {
                        int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize));
                        RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova
                        RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2))));
                        totalPopcount += popCount;
                        if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length)
                        {
                            currentNode.RRRStruct.superblockSums.Add(totalPopcount);
                            currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length);
                        }
                    }
                    currentNode.RRRStruct.Bitmap = RRRbitmap.ToString();
                }
                return;
            }
        }