public MerDictionary(long dictionarySize, int merSize) { if (dictionarySize > maxTable) { partitioned = true; keySize = (int)Math.Ceiling(Math.Log(dictionarySize / maxTable, 4)); // find minimum key size if (keySize < 1) { keySize = 1; // must partition on at least one base } noOfPartitions = (int)Math.Pow(4, keySize); // giving this many partitions keyBaseShift = 64 - keySize * 2; // shift right this many bits to extract the partition no. int partitionLength = (int)(dictionarySize / noOfPartitions); // with this average length (but scaled to reflect canonical distributions) dictionaryPartitions = new kMerDictionary <TV> [noOfPartitions]; for (int i = 0; i < noOfPartitions; i++) { int scaledPartitionLength = 2 * partitionLength * (noOfPartitions - i) / noOfPartitions; // =(16-I2)/16*2*(F19/16) dictionaryPartitions[i] = new kMerDictionary <TV>(scaledPartitionLength, merSize); } } else { partitioned = false; dictionary = new kMerDictionary <TV>((int)dictionarySize, merSize); } capacity = dictionarySize; }
private void CopyToTable(int partitionNo, ulong[] orderedMers, TV[] orderedValues, int merCount, int merSize) { //Console.WriteLine("starting copy for partition " + partitionNo); kMerDictionary <TV> pmp = new kMerDictionary <TV>(merCount, merSize); pmers[partitionNo] = pmp; for (int i = 0; i < merCount; i++) { pmp.AddNoCheck(orderedMers[i], orderedValues[i]); } pmp.Optimise(); //Console.WriteLine(merCount + " mers copied for partition " + partitionNo); }
int keyBaseShift = 0; // # of bits to shift to get partition-key bases (bits) public MerTable(long tableSize, int merSize) { this.merSize = merSize; if (tableSize > maxTable) { partitioned = true; int keySizeBases = (int)Math.Ceiling(Math.Log(tableSize / maxTable, 4)); // find minimum key size if (keySizeBases < 1) { keySizeBases = 1; // must partition on at least one base } keyBaseShift = 64 - keySizeBases * 2; noParts = (int)Math.Pow(4, keySizeBases); // giving this many partitions int partitionLength = (int)(tableSize / noParts); // int scaledPartitionLength = 2 * partitionLength; // rough guess as to space needed to hold the first (largest) partition of loaded kmers orderedMers = new ulong[2][]; orderedValues = new TV[2][]; for (int b = 0; b < 2; b++) { orderedMers[b] = new ulong[scaledPartitionLength]; orderedValues[b] = new TV[scaledPartitionLength]; } ctd = new CopyToTableDelegate(CopyToTable); pmers = new kMerDictionary <TV> [noParts]; ulong fillBases = 0xffffffffffffffff >> (keySizeBases * 2); partitionBoundaries = new ulong[noParts]; for (ulong k = 0; k < (ulong)noParts; k++) { partitionBoundaries[k] = k << keyBaseShift | fillBases; } } else { mers = new kMerDictionary <TV>((int)tableSize, merSize); } }
public bool LoadFinished() { if (dataNeedsSorting) { Console.WriteLine("kmers being loaded into table were not in sorted order"); return(false); } if (partitioned) { // wait for previous buffer copy to complete if (iarCopyToTable != null && !iarCopyToTable.IsCompleted) { //Console.WriteLine("waiting for copy to finish before final copy"); iarCopyToTable.AsyncWaitHandle.WaitOne(); } // copy final buffer into its hash table partition //Console.WriteLine("calling copy from LoadFinished for partition " + currentPartition + " for buffer " + currentBuffer); CopyToTable(currentPartition, orderedMers[currentBuffer], orderedValues[currentBuffer], cpi, merSize); for (int p = 0; p < pmers.Length; p++) { if (pmers[p] == null) { pmers[p] = new kMerDictionary <TV>(1, merSize); } } } // finished with these buffers now orderedMers = null; orderedValues = null; //Console.WriteLine(mersAdded + " mers added, " + mersCopied + " mers copied"); return(true); }