コード例 #1
0
        //method rank() which doesn't use RRR data structure. It returns number of occurenses for given character
        //in input nucleotide sequence
        public int rank(WaveletNode currentNode, int index, char character, ArrayList currentAlphabet)
        {
            if (!currentAlphabet.Contains(character))
            {
                return(0);
            }

            int       mid = (currentAlphabet.Count + 1) / 2;
            int       newIndex;
            ArrayList currentAlphabetSliced = new ArrayList();

            if (getIndex(character, currentAlphabet) < mid)
            {
                newIndex              = index - popcount(currentNode.getBitmap(), index);
                currentNode           = currentNode.getLeftChild();
                currentAlphabetSliced = currentAlphabet.GetRange(0, (currentAlphabet.Count - (mid - 1)));
            }
            else
            {
                newIndex              = popcount(currentNode.getBitmap(), index) - 1;
                currentNode           = currentNode.getRightChild();
                currentAlphabetSliced = currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid));
            }

            if (currentNode != null)
            {
                return(rank(currentNode, newIndex, character, currentAlphabetSliced));
            }
            else
            {
                return(newIndex + 1);
            }
        }
コード例 #2
0
 public void setParent(WaveletNode parent)
 {
     this.parent = parent;
 }
コード例 #3
0
 public void setRightChild(WaveletNode child)
 {
     this.rightChild = child;
 }
コード例 #4
0
 public void setLeftChild(WaveletNode child)
 {
     this.leftChild = child;
 }
コード例 #5
0
        //select() method returns the index of n-th occurence of given character in input nucleotide string.
        //It doesn't use RRR data structure, it uses regular bitmap.
        public int select(int nthOccurrence, char character)
        {
            Interval    alphabeticInterval           = new Interval(0, alphabet.Count - 1);
            WaveletNode currentNode                  = rootNode;
            int         indexOfCharInAlph            = getIndex(character, alphabet);
            bool        characterRepresentedWithZero = true;

            while (alphabeticInterval.isGreaterThanTwo())
            {
                if (alphabeticInterval.getSize() == 3)
                {
                    if (alphabeticInterval.getRightIndex() == indexOfCharInAlph)
                    {
                        characterRepresentedWithZero = false;
                        break;
                    }
                }

                if (indexOfCharInAlph <= alphabeticInterval.getMiddleIndex())
                {
                    currentNode = currentNode.getLeftChild();
                    alphabeticInterval.setRightIndex();
                }
                else
                {
                    currentNode = currentNode.getRightChild();
                    alphabeticInterval.setLeftIndex();
                }
            }

            if (characterRepresentedWithZero)
            {
                if (alphabeticInterval.getLeftIndex() == indexOfCharInAlph)
                {
                    characterRepresentedWithZero = true;
                }
                else
                {
                    characterRepresentedWithZero = false;
                }
            }

            int position = getPositionOfNthOccurrence(currentNode.getBitmap(), nthOccurrence, characterRepresentedWithZero);

            if (position == 0)
            {
                return(-1);
            }

            WaveletNode child = currentNode;

            currentNode = currentNode.getParent();

            while (currentNode != null)
            {
                if (currentNode.getLeftChild().Equals(child))
                {
                    position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, true);
                }
                else
                {
                    position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, false);
                }

                currentNode = currentNode.getParent();
                child       = child.getParent();
            }

            return(position - 1);
        }
コード例 #6
0
        //selectOnBitmap() method returns the index of n-th occurence of given character(0 or 1).
        //It uses RRR data structure.
        int selectOnBitmap(WaveletNode currentNode, int index, bool characterRepresentedWithZero)
        {
            int suma = 0, Select = 0, shift = 0;

            for (int i = 0; i < currentNode.RRRStruct.superblockOffsets.Count; i++)
            {
                if ((currentNode.RRRStruct.superblockSums[i] >= index && !characterRepresentedWithZero) || (((i + 1) * currentNode.RRRTable.SuperblockSize - (currentNode.RRRStruct.superblockSums[i]) >= index && characterRepresentedWithZero)))
                {
                    break;
                }
                else
                {
                    Select = currentNode.RRRTable.SuperblockSize * (i + 1);//broj bitova prijeđenih do sada
                    if (!characterRepresentedWithZero)
                    {
                        suma = currentNode.RRRStruct.superblockSums[i];
                    }
                    else
                    {
                        suma = (i + 1) * currentNode.RRRTable.SuperblockSize - (currentNode.RRRStruct.superblockSums[i]);
                    }

                    shift = currentNode.RRRStruct.superblockOffsets[i];
                }
            }

            int    klass;
            int    offsetBits;
            int    lastClass;
            int    lastOffset;
            string lastBlock;

            for (int i = 0; i < currentNode.RRRTable.SuperblockSize / currentNode.RRRTable.BlockSize; i++)
            {
                klass = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2);

                if (((suma + klass >= index) && !characterRepresentedWithZero) || ((suma + currentNode.RRRTable.BlockSize - klass) >= index && characterRepresentedWithZero))
                {
                    break;
                }
                else
                {
                    if (!characterRepresentedWithZero)
                    {
                        suma += klass;
                    }
                    else
                    {
                        suma += currentNode.RRRTable.BlockSize - klass;
                    }
                    offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, klass)), 2));
                    if (offsetBits == 0)
                    {
                        offsetBits = 1;
                    }
                    shift   = shift + currentNode.RRRTable.ClassBitsNeeded + offsetBits;
                    Select += currentNode.RRRTable.BlockSize;
                }
            }

            lastClass = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2);

            offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, lastClass)), 2));
            if (offsetBits == 0)
            {
                offsetBits = 1;
            }
            lastOffset = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift + currentNode.RRRTable.ClassBitsNeeded, offsetBits), 2);
            lastBlock  = currentNode.RRRTable.TableG[lastClass][lastOffset];
            for (int i = 0; i < lastBlock.Length; i++)
            {
                if (suma == index)
                {
                    break;
                }
                Select++;
                if ((lastBlock[i] == '1' && !characterRepresentedWithZero) || (lastBlock[i] == '0' && characterRepresentedWithZero))
                {
                    suma++;
                }
            }

            return(Select);
        }
コード例 #7
0
        //selectRRR() method returns index of n-th occurence of given character in input nucleotide string
        public int selectRRR(int nthOccurrence, char character)
        {
            Interval    alphabeticInterval           = new Interval(0, alphabet.Count - 1);
            WaveletNode currentNode                  = rootNode;
            int         indexOfCharInAlph            = getIndex(character, alphabet);
            bool        characterRepresentedWithZero = true;

            while (alphabeticInterval.isGreaterThanTwo())
            {
                if (alphabeticInterval.getSize() == 3)
                {
                    if (alphabeticInterval.getRightIndex() == indexOfCharInAlph)
                    {
                        characterRepresentedWithZero = false;
                        break;
                    }
                }

                if (indexOfCharInAlph <= alphabeticInterval.getMiddleIndex())
                {
                    currentNode = currentNode.getLeftChild();
                    alphabeticInterval.setRightIndex();
                }
                else
                {
                    currentNode = currentNode.getRightChild();
                    alphabeticInterval.setLeftIndex();
                }
            }

            if (characterRepresentedWithZero)
            {
                if (alphabeticInterval.getLeftIndex() == indexOfCharInAlph)
                {
                    characterRepresentedWithZero = true;
                }
                else
                {
                    characterRepresentedWithZero = false;
                }
            }

            //  bottom-up tree traversal once we have node representing given character
            int position = selectOnBitmap(currentNode, nthOccurrence, characterRepresentedWithZero);

            if (position == 0)
            {
                return(-1); // no n occurrences of character
            }

            WaveletNode child = currentNode;

            currentNode = currentNode.getParent();

            while (currentNode != null)
            {
                if (currentNode.getLeftChild().Equals(child))
                {
                    position = selectOnBitmap(currentNode, position, true);
                }
                else
                {
                    position = selectOnBitmap(currentNode, position, false);
                }

                currentNode = currentNode.getParent();
                child       = child.getParent();
            }

            return(position - 1);
        }
コード例 #8
0
        //method rank() which uses RRR data structure. It returns number of occurenses for given character
        //in input nucleotide sequence
        public int rankRRR(WaveletNode currentNode, int index, char character, ArrayList currentAlphabet)
        {
            int blockIndex;
            int superBlockIndex;
            int bitsSum;
            int currentPos;
            int blocksRemaining;

            if (!currentAlphabet.Contains(character))
            {
                return(0);
            }

            int       mid = (currentAlphabet.Count + 1) / 2;
            int       newIndex;
            ArrayList currentAlphabetSliced = new ArrayList();

            blockIndex      = index / currentNode.RRRTable.BlockSize;
            superBlockIndex = blockIndex / (currentNode.RRRTable.SuperblockSize / currentNode.RRRTable.BlockSize);
            if (superBlockIndex == 0)
            {
                currentPos = 0;
                bitsSum    = 0;
            }
            else
            {
                currentPos = currentNode.RRRStruct.superblockOffsets[superBlockIndex - 1];
                bitsSum    = currentNode.RRRStruct.superblockSums[superBlockIndex - 1];
            }

            blocksRemaining = blockIndex - (currentNode.RRRTable.SuperblockSize / currentNode.RRRTable.BlockSize) * superBlockIndex;


            int shift = currentPos;
            int lastClass;
            int lastOffset;
            int klass;
            int offsetBits;

            for (int i = 0; i < blocksRemaining; i++)
            {
                klass      = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2);
                bitsSum   += klass;
                offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, klass)), 2));
                if (offsetBits == 0)
                {
                    offsetBits = 1;
                }
                shift = shift + currentNode.RRRTable.ClassBitsNeeded + offsetBits;
            }

            lastClass = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2);

            offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, lastClass)), 2));
            if (offsetBits == 0)
            {
                offsetBits = 1;
            }
            lastOffset = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift + currentNode.RRRTable.ClassBitsNeeded, offsetBits), 2);
            bitsSum   += popcount(currentNode.RRRTable.TableG[lastClass][lastOffset], index % currentNode.RRRTable.BlockSize);

            if (getIndex(character, currentAlphabet) < mid)
            {
                newIndex              = index - bitsSum;
                currentNode           = currentNode.getLeftChild();
                currentAlphabetSliced = currentAlphabet.GetRange(0, (currentAlphabet.Count - (mid - 1)));
            }
            else
            {
                newIndex              = bitsSum - 1;
                currentNode           = currentNode.getRightChild();
                currentAlphabetSliced = currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid));
            }

            if (currentNode != null)
            {
                return(rankRRR(currentNode, newIndex, character, currentAlphabetSliced));
            }
            else
            {
                return(newIndex + 1);
            }
        }
コード例 #9
0
        public WaveletTreeRRR(string[] args)
        {
            alphabet = new ArrayList();
            rootNode = new WaveletNode();

            if (args.Length != 4)
            {
                throw new Exception("Error. Check your arguments.");
            }

            //Stopwatch is being used for measuring time consumption of key methods
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();
            //FASTA file reading
            readFile(args[0]);
            stopWatch.Stop();


            Console.WriteLine("Comments are: " + commentLines);
            Console.WriteLine("FASTA sequence has " + originalSequence.Length + " characters");
            Console.WriteLine("Reading of whole FASTA file was taking " + stopWatch.Elapsed.TotalMilliseconds + " ms");
            stopWatch.Reset();

            //determination of original sequence alphabet
            stopWatch.Start();
            getAlphabet();
            stopWatch.Stop();
            Console.WriteLine("Getting and sorting the FASTA sequence alphabet was taking " + stopWatch.Elapsed.TotalMilliseconds + " ms");
            stopWatch.Reset();

            //wavelet tree construction
            stopWatch.Start();
            buildWaveletTree(alphabet, originalSequence, rootNode);
            stopWatch.Stop();
            Console.WriteLine("Wavelet tree was made in " + stopWatch.Elapsed.TotalMilliseconds + " ms");
            stopWatch.Reset();

            //loading arguments
            try
            {
                operation  = Char.ToUpper(Char.Parse(args[1]));
                charachter = Char.ToUpper(Char.Parse(args[2]));
                boundary   = int.Parse(args[3]);
            }
            catch (Exception e)
            {
                throw new Exception("Error loading arguments");
            }



            if (operation == 'S')
            {
                /*stopWatch.Start();
                *  Console.WriteLine("Select operation without RRR data structure for ("+ charachter +", "+boundary+") gave result " + select(boundary,charachter));
                *  stopWatch.Stop();
                *  Console.WriteLine("Operation lasted for " + stopWatch.Elapsed.TotalMilliseconds + " ms");
                *  stopWatch.Reset();*/

                //calling select(c,i) method
                stopWatch.Start();
                Console.WriteLine("Select operation using RRR data structure for (" + charachter + ", " + boundary + ") gave result " + selectRRR(boundary, charachter));
                stopWatch.Stop();
                Console.WriteLine("Operation lasted for " + stopWatch.Elapsed.TotalMilliseconds + " ms");
                stopWatch.Reset();
            }
            else if (operation == 'R')
            {
                /*stopWatch.Start();
                *  Console.WriteLine("Rank operation without RRR data structure for (" + charachter + ", " + boundary + ") gave result " + rank(rootNode, boundary, charachter, alphabet));
                *  stopWatch.Stop();
                *  Console.WriteLine("Operation lasted for " + stopWatch.Elapsed.TotalMilliseconds + " ms");
                *  stopWatch.Reset();*/

                //calling rank(c,i) method
                stopWatch.Start();
                Console.WriteLine("Rank operation using RRR data structure for (" + charachter + ", " + boundary + ") gave result " + rankRRR(rootNode, boundary, charachter, alphabet));
                stopWatch.Stop();
                Console.WriteLine("Operation lasted for " + stopWatch.Elapsed.TotalMilliseconds + " ms");
                stopWatch.Reset();
            }
            else
            {
                Console.WriteLine("'" + operation + "' operation does not exist!");
            }
        }
コード例 #10
0
        //method buildWaveletTree() constructs wavelet tree. It calls creation of RRR lookup table and
        //adds RRR data structure to every node.
        public void buildWaveletTree(ArrayList currentAlphabet, String currentLabel, WaveletNode currentNode)
        {
            if (currentAlphabet.Count > 2)
            {
                StringBuilder bitmapBuilder = new StringBuilder();
                StringBuilder leftLabel     = new StringBuilder();
                StringBuilder rightLabel    = new StringBuilder();
                StringBuilder RRRbitmap     = new StringBuilder();
                int           blockSize;
                int           superblockSize;


                int mid = (currentAlphabet.Count + 1) / 2;

                //creation of node bitmap
                foreach (char c in currentLabel)
                {
                    if (getIndex(Char.ToUpper(c), currentAlphabet) < mid)
                    {
                        bitmapBuilder.Append("0");
                        leftLabel.Append(Char.ToUpper(c));
                    }
                    else
                    {
                        bitmapBuilder.Append("1");
                        rightLabel.Append(Char.ToUpper(c));
                    }
                }


                currentNode.setBitmap(bitmapBuilder.ToString());

                blockSize = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2);
                if (blockSize == 0)
                {
                    blockSize = 1;
                }
                superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2)));

                //creation of RRRLookupTable
                currentNode.RRRTable.BlockSize       = blockSize;
                currentNode.RRRTable.SuperblockSize  = superblockSize;
                currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1;
                currentNode.RRRTable.buildTableG();

                //filling the bitmap with additional zeroes so that every block is equal in size
                if (bitmapBuilder.Length % blockSize != 0)
                {
                    do
                    {
                        bitmapBuilder.Append("0");
                    } while (bitmapBuilder.Length % blockSize != 0);
                }

                //creation of bitmap which is being used for creation od RRR data structure
                currentNode.setHelpBitmap(bitmapBuilder.ToString());

                //creation od RRR Data Structure
                int totalPopcount = 0;
                for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize)
                {
                    int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize));
                    RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova
                    RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2))));
                    totalPopcount += popCount;
                    if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length)
                    {
                        currentNode.RRRStruct.superblockSums.Add(totalPopcount);
                        currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length);
                    }
                }
                currentNode.RRRStruct.Bitmap = RRRbitmap.ToString();
                currentNode.setLeftChild(new WaveletNode());
                currentNode.getLeftChild().setParent(currentNode);
                buildWaveletTree(currentAlphabet.GetRange(0, mid), leftLabel.ToString(), currentNode.getLeftChild());

                //if current alphabet has more than 3 characters, than it creates right child and calls recursion on this child
                if (currentAlphabet.Count > 3)
                {
                    currentNode.setRightChild(new WaveletNode());
                    currentNode.getRightChild().setParent(currentNode);
                    buildWaveletTree(currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid)),
                                     rightLabel.ToString(), currentNode.getRightChild());
                }
            }
            else
            {   //creation of child nodes
                if (currentAlphabet.Count == 2)
                {
                    StringBuilder bitmapBuilder = new StringBuilder();
                    StringBuilder RRRbitmap     = new StringBuilder();
                    int           blockSize;
                    int           superblockSize;

                    foreach (char c in currentLabel)
                    {
                        if (getIndex(Char.ToUpper(c), currentAlphabet) + 1 == 1)
                        {
                            bitmapBuilder.Append("0");
                        }
                        else
                        {
                            bitmapBuilder.Append("1");
                        }
                    }

                    currentNode.setBitmap(bitmapBuilder.ToString());
                    blockSize      = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2);
                    superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2)));
                    currentNode.RRRTable.BlockSize       = blockSize;
                    currentNode.RRRTable.SuperblockSize  = superblockSize;
                    currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1;
                    currentNode.RRRTable.buildTableG();

                    if (bitmapBuilder.Length % blockSize != 0)
                    {
                        do
                        {
                            bitmapBuilder.Append("0");
                        } while (bitmapBuilder.Length % blockSize != 0);
                    }

                    currentNode.setHelpBitmap(bitmapBuilder.ToString());

                    int totalPopcount = 0;
                    for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize)
                    {
                        int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize));
                        RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova
                        RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2))));
                        totalPopcount += popCount;
                        if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length)
                        {
                            currentNode.RRRStruct.superblockSums.Add(totalPopcount);
                            currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length);
                        }
                    }
                    currentNode.RRRStruct.Bitmap = RRRbitmap.ToString();
                }
                return;
            }
        }