private static void FlushLowRepMersInPartition(object threadParams) { FlushingThreadParams flushingParams = (FlushingThreadParams)threadParams; MerTables merTable = flushingParams.merTable; int partitionNo = flushingParams.partitionNo; int culledMers = merTable.repeatedMers[partitionNo].Reduce(minKeepDepth, merTable); // and it can't possibly be full if we've just culled some entries for it if (culledMers > 0) { merTable.repeatedMersFull[partitionNo] = false; } //Console.WriteLine("flushed " + culledMers + " from partition " + partitionNo + ". " + merTable.repeatedMers[partitionNo].Count + " left"); }
// An in-place reduction of a MerDictionary. All entries with a depth < minReps are copied to the culled arrays // and entries to be kept are moved towards the beginning of the table. Assumes that the entries are inserted // sequentially and that there are no empty slots. Culled entries are placed in the shared 'culled' arrays. If these arrays // are filled, they are moved to the 'filled' lists. // public int Reduce(long minReps, MerTables merTable) { int culledCount = 0; LowRepMerBuffer culledBuffer = merTable.culledBuffer; // clear out all the buckets for (int i = 0; i < buckets.Length; i++) buckets[i] = -1; // and either re-add or cull each entry int originalCount = count; count = 0; for (int i = 0; i < originalCount; i++) { //ulong key = this.entries[i].key; //long value = this.entries[i].value; ulong key = this.keys[i]; long value = this.values[i]; long summedValue = (value >> 32) + (value & 0xffffffff); if (summedValue > minReps) // keeping this entry as it has enough counts Add(key, value); // must always be room in the table - worst case is rewriting all entries in-place else { // flush this entry to the (shared) culled buffer int ci = Interlocked.Increment(ref culledBuffer.idx) - 1; if (ci >= culledBuffer.limit) { lock (merTable.culledLock) { // re-test now that we have the lock (some other thread could have flushed the array while we were waiting for the lock) if (culledBuffer.bufferActive) { // this buffer is being saved so it's no longer active culledBuffer.bufferActive = false; // add the full buffer to the list of filled buffers if (merTable.filledCulledBuffers == null) merTable.filledCulledBuffers = new List<LowRepMerBuffer>(); merTable.filledCulledBuffers.Add(merTable.culledBuffer); // prepare a new culledBuffer LowRepMerBuffer newCulledBuffer = new LowRepMerBuffer(); newCulledBuffer.keys = new ulong[merTable.culledBuffer.keys.Length]; newCulledBuffer.values = new long[merTable.culledBuffer.values.Length]; newCulledBuffer.idx = 0; newCulledBuffer.limit = culledBuffer.limit; newCulledBuffer.bufferActive = true; newCulledBuffer.bufferNo = culledBuffer.bufferNo + 1; // and (atomically) make it available to any concurrent threads merTable.culledBuffer = newCulledBuffer; } } // remember to use the new buffer for this thread culledBuffer = merTable.culledBuffer; // and get a new index for this new buffer ci = Interlocked.Increment(ref culledBuffer.idx) - 1; } culledBuffer.keys[ci] = key; culledBuffer.values[ci] = value; culledCount++; } } // for each entry in the original entries table return culledCount; }
// flush the low-rep mers from the repeat tables, condense the remaining repeated mers and fold in the per-thread repeats. Can only be called after all the // threads have finished for a seq data file. This code is *not* thread-safe. public void FlushLowRepMers(MerTables merTable, int fileNo) { // allocate a buffer to hold the flushed low-rep mers //int initialBufferLength = 500000; int initialBufferLength = this.repeatedMers[0].Capacity; culledBuffer = new LowRepMerBuffer(); culledBuffer.keys = new ulong[initialBufferLength + noOfPartitions]; culledBuffer.values = new long[initialBufferLength + noOfPartitions]; culledBuffer.idx = 0; culledBuffer.bufferActive = true; culledBuffer.bufferNo = 1; culledBuffer.limit = initialBufferLength; culledLock = new object(); FlushingThreadParams[] flushingParams = new FlushingThreadParams[noOfPartitions]; Thread[] flushingThreads = new Thread[noOfPartitions]; for (int p = 0; p < noOfPartitions; p++) { flushingParams[p] = new FlushingThreadParams(); flushingParams[p].merTable = merTable; flushingParams[p].partitionNo = p; flushingThreads[p] = new Thread(new ParameterizedThreadStart(MerTables.FlushLowRepMersInPartition)); flushingThreads[p].Priority = ThreadPriority.BelowNormal; flushingThreads[p].Start(flushingParams[p]); } for (int p = 0; p < noOfPartitions; p++) { flushingThreads[p].Join(); flushingThreads[p] = null; } // write out any filled culled buffers int bufferNo = 0; if (filledCulledBuffers != null) { for (int i = 0; i < filledCulledBuffers.Count; i++) { WriteLowRepMers(fileNo, bufferNo, filledCulledBuffers[i], filledCulledBuffers[i].keys.Length); bufferNo++; filledCulledBuffers[i] = null; } filledCulledBuffers = null; } // finally write out the remaining culled low-rep mers WriteLowRepMers(fileNo, bufferNo, culledBuffer, culledBuffer.idx); // return the temporary buffers culledBuffer = null; // finally push the per-thread dictionaries to the shared dictionary for (int t = 0; t < overflowMers.Length; t++) { if (overflowMers[t] == null) { continue; } MerDictionary currentOverflow = overflowMers[t]; MerDictionary replacementOverflow = new MerDictionary(currentOverflow.Capacity, fullMerMask); foreach (KeyValuePair <ulong, long> kvp in currentOverflow) { int absMerHashCode = kvp.Key.GetHashCode() & int31Mask; int partitionNo = absMerHashCode % noOfPartitions; if (repeatedMersFull[partitionNo]) { replacementOverflow.Add(kvp.Key, kvp.Value); } else { bool OK = repeatedMers[partitionNo].Add(kvp.Key, kvp.Value); if (!OK) { repeatedMersFull[partitionNo] = true; } } } overflowMers[t] = replacementOverflow; } }