//method rank() which doesn't use RRR data structure. It returns number of occurenses for given character //in input nucleotide sequence public int rank(WaveletNode currentNode, int index, char character, ArrayList currentAlphabet) { if (!currentAlphabet.Contains(character)) { return(0); } int mid = (currentAlphabet.Count + 1) / 2; int newIndex; ArrayList currentAlphabetSliced = new ArrayList(); if (getIndex(character, currentAlphabet) < mid) { newIndex = index - popcount(currentNode.getBitmap(), index); currentNode = currentNode.getLeftChild(); currentAlphabetSliced = currentAlphabet.GetRange(0, (currentAlphabet.Count - (mid - 1))); } else { newIndex = popcount(currentNode.getBitmap(), index) - 1; currentNode = currentNode.getRightChild(); currentAlphabetSliced = currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid)); } if (currentNode != null) { return(rank(currentNode, newIndex, character, currentAlphabetSliced)); } else { return(newIndex + 1); } }
//select() method returns the index of n-th occurence of given character in input nucleotide string. //It doesn't use RRR data structure, it uses regular bitmap. public int select(int nthOccurrence, char character) { Interval alphabeticInterval = new Interval(0, alphabet.Count - 1); WaveletNode currentNode = rootNode; int indexOfCharInAlph = getIndex(character, alphabet); bool characterRepresentedWithZero = true; while (alphabeticInterval.isGreaterThanTwo()) { if (alphabeticInterval.getSize() == 3) { if (alphabeticInterval.getRightIndex() == indexOfCharInAlph) { characterRepresentedWithZero = false; break; } } if (indexOfCharInAlph <= alphabeticInterval.getMiddleIndex()) { currentNode = currentNode.getLeftChild(); alphabeticInterval.setRightIndex(); } else { currentNode = currentNode.getRightChild(); alphabeticInterval.setLeftIndex(); } } if (characterRepresentedWithZero) { if (alphabeticInterval.getLeftIndex() == indexOfCharInAlph) { characterRepresentedWithZero = true; } else { characterRepresentedWithZero = false; } } int position = getPositionOfNthOccurrence(currentNode.getBitmap(), nthOccurrence, characterRepresentedWithZero); if (position == 0) { return(-1); } WaveletNode child = currentNode; currentNode = currentNode.getParent(); while (currentNode != null) { if (currentNode.getLeftChild().Equals(child)) { position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, true); } else { position = getPositionOfNthOccurrence(currentNode.getBitmap(), position, false); } currentNode = currentNode.getParent(); child = child.getParent(); } return(position - 1); }
//method buildWaveletTree() constructs wavelet tree. It calls creation of RRR lookup table and //adds RRR data structure to every node. public void buildWaveletTree(ArrayList currentAlphabet, String currentLabel, WaveletNode currentNode) { if (currentAlphabet.Count > 2) { StringBuilder bitmapBuilder = new StringBuilder(); StringBuilder leftLabel = new StringBuilder(); StringBuilder rightLabel = new StringBuilder(); StringBuilder RRRbitmap = new StringBuilder(); int blockSize; int superblockSize; int mid = (currentAlphabet.Count + 1) / 2; //creation of node bitmap foreach (char c in currentLabel) { if (getIndex(Char.ToUpper(c), currentAlphabet) < mid) { bitmapBuilder.Append("0"); leftLabel.Append(Char.ToUpper(c)); } else { bitmapBuilder.Append("1"); rightLabel.Append(Char.ToUpper(c)); } } currentNode.setBitmap(bitmapBuilder.ToString()); blockSize = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2); if (blockSize == 0) { blockSize = 1; } superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2))); //creation of RRRLookupTable currentNode.RRRTable.BlockSize = blockSize; currentNode.RRRTable.SuperblockSize = superblockSize; currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1; currentNode.RRRTable.buildTableG(); //filling the bitmap with additional zeroes so that every block is equal in size if (bitmapBuilder.Length % blockSize != 0) { do { bitmapBuilder.Append("0"); } while (bitmapBuilder.Length % blockSize != 0); } //creation of bitmap which is being used for creation od RRR data structure currentNode.setHelpBitmap(bitmapBuilder.ToString()); //creation od RRR Data Structure int totalPopcount = 0; for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize) { int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize)); RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2)))); totalPopcount += popCount; if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length) { currentNode.RRRStruct.superblockSums.Add(totalPopcount); currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length); } } currentNode.RRRStruct.Bitmap = RRRbitmap.ToString(); currentNode.setLeftChild(new WaveletNode()); currentNode.getLeftChild().setParent(currentNode); buildWaveletTree(currentAlphabet.GetRange(0, mid), leftLabel.ToString(), currentNode.getLeftChild()); //if current alphabet has more than 3 characters, than it creates right child and calls recursion on this child if (currentAlphabet.Count > 3) { currentNode.setRightChild(new WaveletNode()); currentNode.getRightChild().setParent(currentNode); buildWaveletTree(currentAlphabet.GetRange(mid, (currentAlphabet.Count - mid)), rightLabel.ToString(), currentNode.getRightChild()); } } else { //creation of child nodes if (currentAlphabet.Count == 2) { StringBuilder bitmapBuilder = new StringBuilder(); StringBuilder RRRbitmap = new StringBuilder(); int blockSize; int superblockSize; foreach (char c in currentLabel) { if (getIndex(Char.ToUpper(c), currentAlphabet) + 1 == 1) { bitmapBuilder.Append("0"); } else { bitmapBuilder.Append("1"); } } currentNode.setBitmap(bitmapBuilder.ToString()); blockSize = (int)(Math.Log(currentNode.getBitmap().Length, 2) / 2); superblockSize = (int)(blockSize * Math.Floor(Math.Log(currentNode.getBitmap().Length, 2))); currentNode.RRRTable.BlockSize = blockSize; currentNode.RRRTable.SuperblockSize = superblockSize; currentNode.RRRTable.ClassBitsNeeded = (int)Math.Floor(Math.Log(blockSize, 2)) + 1; currentNode.RRRTable.buildTableG(); if (bitmapBuilder.Length % blockSize != 0) { do { bitmapBuilder.Append("0"); } while (bitmapBuilder.Length % blockSize != 0); } currentNode.setHelpBitmap(bitmapBuilder.ToString()); int totalPopcount = 0; for (int i = 0; i < currentNode.getHelpBitmap().Length; i = i + blockSize) { int popCount = popcount(currentNode.getHelpBitmap().Substring(i, blockSize)); RRRbitmap.Append(ToBin(popCount, currentNode.RRRTable.ClassBitsNeeded)); //spremi klasu sa potrebnim brojem bitova RRRbitmap.Append(ToBin(currentNode.RRRTable.TableG.FirstOrDefault(t => t.Key == popCount).Value.IndexOf(currentNode.getHelpBitmap().Substring(i, blockSize)), (int)Math.Ceiling(Math.Log((GetBinCoeff(blockSize, popCount)), 2)))); totalPopcount += popCount; if (((i + blockSize) % superblockSize) == 0 || (i + blockSize) >= currentNode.getHelpBitmap().Length) { currentNode.RRRStruct.superblockSums.Add(totalPopcount); currentNode.RRRStruct.superblockOffsets.Add(RRRbitmap.Length); } } currentNode.RRRStruct.Bitmap = RRRbitmap.ToString(); } return; } }