private void WriteLowRepMers(int fileNo, int bufferNo, LowRepMerBuffer culledBuffer, int noMers) { ulong[] culledBufferKeys = culledBuffer.keys; long[] culledBufferValues = culledBuffer.values; //for (int i = 0; i < noMers; i++) // if (culledBufferValues[i] == 0) // Debugger.Break(); // sort the singletons (only active) Array.Sort <ulong, long>(culledBufferKeys, culledBufferValues, 0, noMers); // Add process ID at the beginning of the file name int processID = Process.GetCurrentProcess().Id; string binaryfileName = tempDirectory + processID + "_" + fileNo + "_" + bufferNo + ".bfm"; // Binary writer BinaryWriter flushWriter = new BinaryWriter(File.Open(binaryfileName, FileMode.Create, FileAccess.Write)); // Write the number of k-mers in the flush file at the start flushWriter.Write(noMers); // Write out the in-use singletons for (int i = 0; i < noMers; i++) { flushWriter.Write(culledBufferKeys[i]); flushWriter.Write(culledBufferValues[i]); } flushWriter.Close(); flushedLowRepsFNs.Add(binaryfileName); // Add name of file to list of flush files for the partition firstFlushedLowRepsMer.Add(culledBufferKeys[0]); // remember lowest and highest mer in each flush file lastFlushedLowRepsMer.Add(culledBufferKeys[noMers - 1]); //Console.WriteLine("flushed " + noMers + " low rep mers (" + fileNo + "_" + bufferNo + ")"); }
// An in-place reduction of a MerDictionary. All entries with a depth < minReps are copied to the culled arrays // and entries to be kept are moved towards the beginning of the table. Assumes that the entries are inserted // sequentially and that there are no empty slots. Culled entries are placed in the shared 'culled' arrays. If these arrays // are filled, they are moved to the 'filled' lists. // public int Reduce(long minReps, MerTables merTable) { int culledCount = 0; LowRepMerBuffer culledBuffer = merTable.culledBuffer; // clear out all the buckets for (int i = 0; i < buckets.Length; i++) buckets[i] = -1; // and either re-add or cull each entry int originalCount = count; count = 0; for (int i = 0; i < originalCount; i++) { //ulong key = this.entries[i].key; //long value = this.entries[i].value; ulong key = this.keys[i]; long value = this.values[i]; long summedValue = (value >> 32) + (value & 0xffffffff); if (summedValue > minReps) // keeping this entry as it has enough counts Add(key, value); // must always be room in the table - worst case is rewriting all entries in-place else { // flush this entry to the (shared) culled buffer int ci = Interlocked.Increment(ref culledBuffer.idx) - 1; if (ci >= culledBuffer.limit) { lock (merTable.culledLock) { // re-test now that we have the lock (some other thread could have flushed the array while we were waiting for the lock) if (culledBuffer.bufferActive) { // this buffer is being saved so it's no longer active culledBuffer.bufferActive = false; // add the full buffer to the list of filled buffers if (merTable.filledCulledBuffers == null) merTable.filledCulledBuffers = new List<LowRepMerBuffer>(); merTable.filledCulledBuffers.Add(merTable.culledBuffer); // prepare a new culledBuffer LowRepMerBuffer newCulledBuffer = new LowRepMerBuffer(); newCulledBuffer.keys = new ulong[merTable.culledBuffer.keys.Length]; newCulledBuffer.values = new long[merTable.culledBuffer.values.Length]; newCulledBuffer.idx = 0; newCulledBuffer.limit = culledBuffer.limit; newCulledBuffer.bufferActive = true; newCulledBuffer.bufferNo = culledBuffer.bufferNo + 1; // and (atomically) make it available to any concurrent threads merTable.culledBuffer = newCulledBuffer; } } // remember to use the new buffer for this thread culledBuffer = merTable.culledBuffer; // and get a new index for this new buffer ci = Interlocked.Increment(ref culledBuffer.idx) - 1; } culledBuffer.keys[ci] = key; culledBuffer.values[ci] = value; culledCount++; } } // for each entry in the original entries table return culledCount; }
// flush the low-rep mers from the repeat tables, condense the remaining repeated mers and fold in the per-thread repeats. Can only be called after all the // threads have finished for a seq data file. This code is *not* thread-safe. public void FlushLowRepMers(MerTables merTable, int fileNo) { // allocate a buffer to hold the flushed low-rep mers //int initialBufferLength = 500000; int initialBufferLength = this.repeatedMers[0].Capacity; culledBuffer = new LowRepMerBuffer(); culledBuffer.keys = new ulong[initialBufferLength + noOfPartitions]; culledBuffer.values = new long[initialBufferLength + noOfPartitions]; culledBuffer.idx = 0; culledBuffer.bufferActive = true; culledBuffer.bufferNo = 1; culledBuffer.limit = initialBufferLength; culledLock = new object(); FlushingThreadParams[] flushingParams = new FlushingThreadParams[noOfPartitions]; Thread[] flushingThreads = new Thread[noOfPartitions]; for (int p = 0; p < noOfPartitions; p++) { flushingParams[p] = new FlushingThreadParams(); flushingParams[p].merTable = merTable; flushingParams[p].partitionNo = p; flushingThreads[p] = new Thread(new ParameterizedThreadStart(MerTables.FlushLowRepMersInPartition)); flushingThreads[p].Priority = ThreadPriority.BelowNormal; flushingThreads[p].Start(flushingParams[p]); } for (int p = 0; p < noOfPartitions; p++) { flushingThreads[p].Join(); flushingThreads[p] = null; } // write out any filled culled buffers int bufferNo = 0; if (filledCulledBuffers != null) { for (int i = 0; i < filledCulledBuffers.Count; i++) { WriteLowRepMers(fileNo, bufferNo, filledCulledBuffers[i], filledCulledBuffers[i].keys.Length); bufferNo++; filledCulledBuffers[i] = null; } filledCulledBuffers = null; } // finally write out the remaining culled low-rep mers WriteLowRepMers(fileNo, bufferNo, culledBuffer, culledBuffer.idx); // return the temporary buffers culledBuffer = null; // finally push the per-thread dictionaries to the shared dictionary for (int t = 0; t < overflowMers.Length; t++) { if (overflowMers[t] == null) { continue; } MerDictionary currentOverflow = overflowMers[t]; MerDictionary replacementOverflow = new MerDictionary(currentOverflow.Capacity, fullMerMask); foreach (KeyValuePair <ulong, long> kvp in currentOverflow) { int absMerHashCode = kvp.Key.GetHashCode() & int31Mask; int partitionNo = absMerHashCode % noOfPartitions; if (repeatedMersFull[partitionNo]) { replacementOverflow.Add(kvp.Key, kvp.Value); } else { bool OK = repeatedMers[partitionNo].Add(kvp.Key, kvp.Value); if (!OK) { repeatedMersFull[partitionNo] = true; } } } overflowMers[t] = replacementOverflow; } }