//method rank() which doesn't use RRR data structure. It returns number of occurenses for given character //in input nucleotide sequence public int rank(WaveletNode currentNode, int index, char character, ArrayList currentAlphabet) { if (!currentAlphabet.Contains(character)) { return(0); } int mid = (currentAlphabet.Count + 1) / 2; int newIndex; ArrayList currentAlphabetSliced = new ArrayList(); if (getIndex(character, currentAlphabet) < mid) { newIndex = index - popcount(currentNode.getBitmap(), index); currentNode = currentNode.getLeftChild(); currentAlphabetSliced = currentAlphabet.GetRange(0, (currentAlphabet.Count - (mid - 1))); } else { newIndex = popcount(currentNode.getBitmap(), index) - 1; currentNode = currentNode.getRightChild(); currentAlphabetSliced = currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid)); } if (currentNode != null) { return(rank(currentNode, newIndex, character, currentAlphabetSliced)); } else { return(newIndex + 1); } }
//select() method returns the index of n-th occurence of given character in input nucleotide string. //It doesn't use RRR data structure, it uses regular bitmap. public int select(int nthOccurrence, char character) { Interval alphabeticInterval = new Interval(0, alphabet.Count - 1); WaveletNode currentNode = rootNode; int indexOfCharInAlph = getIndex(character, alphabet); bool characterRepresentedWithZero = true; while (alphabeticInterval.isGreaterThanTwo()) { if (alphabeticInterval.getSize() == 3) { if (alphabeticInterval.getRightIndex() == indexOfCharInAlph) { characterRepresentedWithZero = false; break; } } if (indexOfCharInAlph <= alphabeticInterval.getMiddleIndex()) { currentNode = currentNode.getLeftChild(); alphabeticInterval.setRightIndex(); } else { currentNode = currentNode.getRightChild(); alphabeticInterval.setLeftIndex(); } } if (characterRepresentedWithZero) { if (alphabeticInterval.getLeftIndex() == indexOfCharInAlph) { characterRepresentedWithZero = true; } else { characterRepresentedWithZero = false; } } int position = getPositionOfNthOccurrence(currentNode.getBitmap(), nthOccurrence, characterRepresentedWithZero); if (position == 0) { return(-1); } WaveletNode child = currentNode; currentNode = currentNode.getParent(); while (currentNode != null) { if (currentNode.getLeftChild().Equals(child)) { position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, true); } else { position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, false); } currentNode = currentNode.getParent(); child = child.getParent(); } return(position - 1); }
//selectRRR() method returns index of n-th occurence of given character in input nucleotide string public int selectRRR(int nthOccurrence, char character) { Interval alphabeticInterval = new Interval(0, alphabet.Count - 1); WaveletNode currentNode = rootNode; int indexOfCharInAlph = getIndex(character, alphabet); bool characterRepresentedWithZero = true; while (alphabeticInterval.isGreaterThanTwo()) { if (alphabeticInterval.getSize() == 3) { if (alphabeticInterval.getRightIndex() == indexOfCharInAlph) { characterRepresentedWithZero = false; break; } } if (indexOfCharInAlph <= alphabeticInterval.getMiddleIndex()) { currentNode = currentNode.getLeftChild(); alphabeticInterval.setRightIndex(); } else { currentNode = currentNode.getRightChild(); alphabeticInterval.setLeftIndex(); } } if (characterRepresentedWithZero) { if (alphabeticInterval.getLeftIndex() == indexOfCharInAlph) { characterRepresentedWithZero = true; } else { characterRepresentedWithZero = false; } } // bottom-up tree traversal once we have node representing given character int position = selectOnBitmap(currentNode, nthOccurrence, characterRepresentedWithZero); if (position == 0) { return(-1); // no n occurrences of character } WaveletNode child = currentNode; currentNode = currentNode.getParent(); while (currentNode != null) { if (currentNode.getLeftChild().Equals(child)) { position = selectOnBitmap(currentNode, position, true); } else { position = selectOnBitmap(currentNode, position, false); } currentNode = currentNode.getParent(); child = child.getParent(); } return(position - 1); }
//method rank() which uses RRR data structure. It returns number of occurenses for given character //in input nucleotide sequence public int rankRRR(WaveletNode currentNode, int index, char character, ArrayList currentAlphabet) { int blockIndex; int superBlockIndex; int bitsSum; int currentPos; int blocksRemaining; if (!currentAlphabet.Contains(character)) { return(0); } int mid = (currentAlphabet.Count + 1) / 2; int newIndex; ArrayList currentAlphabetSliced = new ArrayList(); blockIndex = index / currentNode.RRRTable.BlockSize; superBlockIndex = blockIndex / (currentNode.RRRTable.SuperblockSize / currentNode.RRRTable.BlockSize); if (superBlockIndex == 0) { currentPos = 0; bitsSum = 0; } else { currentPos = currentNode.RRRStruct.superblockOffsets[superBlockIndex - 1]; bitsSum = currentNode.RRRStruct.superblockSums[superBlockIndex - 1]; } blocksRemaining = blockIndex - (currentNode.RRRTable.SuperblockSize / currentNode.RRRTable.BlockSize) * superBlockIndex; int shift = currentPos; int lastClass; int lastOffset; int klass; int offsetBits; for (int i = 0; i < blocksRemaining; i++) { klass = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2); bitsSum += klass; offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, klass)), 2)); if (offsetBits == 0) { offsetBits = 1; } shift = shift + currentNode.RRRTable.ClassBitsNeeded + offsetBits; } lastClass = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift, currentNode.RRRTable.ClassBitsNeeded), 2); offsetBits = (int)Math.Ceiling(Math.Log((GetBinCoeff(currentNode.RRRTable.BlockSize, lastClass)), 2)); if (offsetBits == 0) { offsetBits = 1; } lastOffset = Convert.ToInt32(currentNode.RRRStruct.Bitmap.Substring(shift + currentNode.RRRTable.ClassBitsNeeded, offsetBits), 2); bitsSum += popcount(currentNode.RRRTable.TableG[lastClass][lastOffset], index % currentNode.RRRTable.BlockSize); if (getIndex(character, currentAlphabet) < mid) { newIndex = index - bitsSum; currentNode = currentNode.getLeftChild(); currentAlphabetSliced = currentAlphabet.GetRange(0, (currentAlphabet.Count - (mid - 1))); } else { newIndex = bitsSum - 1; currentNode = currentNode.getRightChild(); currentAlphabetSliced = currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid)); } if (currentNode != null) { return(rankRRR(currentNode, newIndex, character, currentAlphabetSliced)); } else { return(newIndex + 1); } }
//method buildWaveletTree() constructs wavelet tree. It calls creation of RRR lookup table and //adds RRR data structure to every node. public void buildWaveletTree(ArrayList currentAlphabet, String currentLabel, WaveletNode currentNode) { if (currentAlphabet.Count > 2) { StringBuilder bitmapBuilder = new StringBuilder(); StringBuilder leftLabel = new StringBuilder(); StringBuilder rightLabel = new StringBuilder(); StringBuilder RRRbitmap = new StringBuilder(); int blockSize; int superblockSize; int mid = (currentAlphabet.Count + 1) / 2; //creation of node bitmap foreach (char c in currentLabel) { if (getIndex(Char.ToUpper(c), currentAlphabet) < mid) { bitmapBuilder.Append("0"); leftLabel.Append(Char.ToUpper(c)); } else { bitmapBuilder.Append("1"); rightLabel.Append(Char.ToUpper(c)); } } currentNode.setBitmap(bitmapBuilder.ToString()); blockSize = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2); if (blockSize == 0) { blockSize = 1; } superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2))); //creation of RRRLookupTable currentNode.RRRTable.BlockSize = blockSize; currentNode.RRRTable.SuperblockSize = superblockSize; currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1; currentNode.RRRTable.buildTableG(); //filling the bitmap with additional zeroes so that every block is equal in size if (bitmapBuilder.Length % blockSize != 0) { do { bitmapBuilder.Append("0"); } while (bitmapBuilder.Length % blockSize != 0); } //creation of bitmap which is being used for creation od RRR data structure currentNode.setHelpBitmap(bitmapBuilder.ToString()); //creation od RRR Data Structure int totalPopcount = 0; for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize) { int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize)); RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2)))); totalPopcount += popCount; if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length) { currentNode.RRRStruct.superblockSums.Add(totalPopcount); currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length); } } currentNode.RRRStruct.Bitmap = RRRbitmap.ToString(); currentNode.setLeftChild(new WaveletNode()); currentNode.getLeftChild().setParent(currentNode); buildWaveletTree(currentAlphabet.GetRange(0, mid), leftLabel.ToString(), currentNode.getLeftChild()); //if current alphabet has more than 3 characters, than it creates right child and calls recursion on this child if (currentAlphabet.Count > 3) { currentNode.setRightChild(new WaveletNode()); currentNode.getRightChild().setParent(currentNode); buildWaveletTree(currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid)), rightLabel.ToString(), currentNode.getRightChild()); } } else { //creation of child nodes if (currentAlphabet.Count == 2) { StringBuilder bitmapBuilder = new StringBuilder(); StringBuilder RRRbitmap = new StringBuilder(); int blockSize; int superblockSize; foreach (char c in currentLabel) { if (getIndex(Char.ToUpper(c), currentAlphabet) + 1 == 1) { bitmapBuilder.Append("0"); } else { bitmapBuilder.Append("1"); } } currentNode.setBitmap(bitmapBuilder.ToString()); blockSize = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2); superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2))); currentNode.RRRTable.BlockSize = blockSize; currentNode.RRRTable.SuperblockSize = superblockSize; currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1; currentNode.RRRTable.buildTableG(); if (bitmapBuilder.Length % blockSize != 0) { do { bitmapBuilder.Append("0"); } while (bitmapBuilder.Length % blockSize != 0); } currentNode.setHelpBitmap(bitmapBuilder.ToString()); int totalPopcount = 0; for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize) { int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize)); RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2)))); totalPopcount += popCount; if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length) { currentNode.RRRStruct.superblockSums.Add(totalPopcount); currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length); } } currentNode.RRRStruct.Bitmap = RRRbitmap.ToString(); } return; } }