private static void FlushLowRepMersInPartition(object threadParams)
        {
            FlushingThreadParams flushingParams = (FlushingThreadParams)threadParams;
            MerTables            merTable       = flushingParams.merTable;
            int partitionNo = flushingParams.partitionNo;
            int culledMers  = merTable.repeatedMers[partitionNo].Reduce(minKeepDepth, merTable);

            // and it can't possibly be full if we've just culled some entries for it
            if (culledMers > 0)
            {
                merTable.repeatedMersFull[partitionNo] = false;
            }
            //Console.WriteLine("flushed " + culledMers + " from partition " + partitionNo + ". " + merTable.repeatedMers[partitionNo].Count + " left");
        }
Ejemplo n.º 2
0
        // An in-place reduction of a MerDictionary. All entries with a depth < minReps are copied to the culled arrays 
        // and entries to be kept are moved towards the beginning of the table. Assumes that the entries are inserted 
        // sequentially and that there are no empty slots. Culled entries are placed in the shared 'culled' arrays. If these arrays
        // are filled, they are moved to the 'filled' lists.
        //
        public int Reduce(long minReps, MerTables merTable)
        {
            int culledCount = 0;
            LowRepMerBuffer culledBuffer = merTable.culledBuffer;

            // clear out all the buckets
            for (int i = 0; i < buckets.Length; i++)
                buckets[i] = -1; 

            // and either re-add or cull each entry
            int originalCount = count;
            count = 0;
            for (int i = 0; i < originalCount; i++)
            {
                //ulong key = this.entries[i].key;
                //long value = this.entries[i].value;
                ulong key = this.keys[i];
                long value = this.values[i];      
                long summedValue = (value >> 32) + (value & 0xffffffff);

                if (summedValue > minReps)
                    // keeping this entry as it has enough counts
                    Add(key, value);                            // must always be room in the table - worst case is rewriting all entries in-place
                else
                {
                    // flush this entry to the (shared) culled buffer
                    int ci = Interlocked.Increment(ref culledBuffer.idx) - 1;

                    if (ci >= culledBuffer.limit)
                    {
                        lock (merTable.culledLock)
                        {
                            // re-test now that we have the lock (some other thread could have flushed the array while we were waiting for the lock)
                            if (culledBuffer.bufferActive)
                            {
                                // this buffer is being saved so it's no longer active
                                culledBuffer.bufferActive = false;
                                // add the full buffer to the list of filled buffers
                                if (merTable.filledCulledBuffers == null)
                                    merTable.filledCulledBuffers = new List<LowRepMerBuffer>();
                                merTable.filledCulledBuffers.Add(merTable.culledBuffer);

                                // prepare a new culledBuffer
                                LowRepMerBuffer newCulledBuffer = new LowRepMerBuffer();
                                newCulledBuffer.keys = new ulong[merTable.culledBuffer.keys.Length];
                                newCulledBuffer.values = new long[merTable.culledBuffer.values.Length];
                                newCulledBuffer.idx = 0;
                                newCulledBuffer.limit = culledBuffer.limit;
                                newCulledBuffer.bufferActive = true;
                                newCulledBuffer.bufferNo = culledBuffer.bufferNo + 1;

                                // and (atomically) make it available to any concurrent threads
                                merTable.culledBuffer = newCulledBuffer;
                            }
                        }

                        // remember to use the new buffer for this thread    
                        culledBuffer = merTable.culledBuffer;   
                        // and get a new index for this new buffer
                        ci = Interlocked.Increment(ref culledBuffer.idx) - 1;
                    }

                    culledBuffer.keys[ci] = key;
                    culledBuffer.values[ci] = value;
                    culledCount++;
                }
            } // for each entry in the original entries table

            return culledCount;
        }
        // flush the low-rep mers from the repeat tables, condense the remaining repeated mers and fold in the per-thread repeats. Can only be called after all the
        // threads have finished for a seq data file. This code is *not* thread-safe.
        public void FlushLowRepMers(MerTables merTable, int fileNo)
        {
            // allocate a buffer to hold the flushed low-rep mers
            //int initialBufferLength = 500000;
            int initialBufferLength = this.repeatedMers[0].Capacity;

            culledBuffer              = new LowRepMerBuffer();
            culledBuffer.keys         = new ulong[initialBufferLength + noOfPartitions];
            culledBuffer.values       = new long[initialBufferLength + noOfPartitions];
            culledBuffer.idx          = 0;
            culledBuffer.bufferActive = true;
            culledBuffer.bufferNo     = 1;
            culledBuffer.limit        = initialBufferLength;
            culledLock = new object();

            FlushingThreadParams[] flushingParams = new FlushingThreadParams[noOfPartitions];
            Thread[] flushingThreads = new Thread[noOfPartitions];

            for (int p = 0; p < noOfPartitions; p++)
            {
                flushingParams[p]             = new FlushingThreadParams();
                flushingParams[p].merTable    = merTable;
                flushingParams[p].partitionNo = p;
                flushingThreads[p]            = new Thread(new ParameterizedThreadStart(MerTables.FlushLowRepMersInPartition));
                flushingThreads[p].Priority   = ThreadPriority.BelowNormal;
                flushingThreads[p].Start(flushingParams[p]);
            }

            for (int p = 0; p < noOfPartitions; p++)
            {
                flushingThreads[p].Join();
                flushingThreads[p] = null;
            }

            // write out any filled culled buffers
            int bufferNo = 0;

            if (filledCulledBuffers != null)
            {
                for (int i = 0; i < filledCulledBuffers.Count; i++)
                {
                    WriteLowRepMers(fileNo, bufferNo, filledCulledBuffers[i], filledCulledBuffers[i].keys.Length);
                    bufferNo++;
                    filledCulledBuffers[i] = null;
                }
                filledCulledBuffers = null;
            }
            // finally write out the remaining culled low-rep mers
            WriteLowRepMers(fileNo, bufferNo, culledBuffer, culledBuffer.idx);

            // return the temporary buffers
            culledBuffer = null;

            // finally push the per-thread dictionaries to the shared dictionary
            for (int t = 0; t < overflowMers.Length; t++)
            {
                if (overflowMers[t] == null)
                {
                    continue;
                }

                MerDictionary currentOverflow     = overflowMers[t];
                MerDictionary replacementOverflow = new MerDictionary(currentOverflow.Capacity, fullMerMask);

                foreach (KeyValuePair <ulong, long> kvp in currentOverflow)
                {
                    int absMerHashCode = kvp.Key.GetHashCode() & int31Mask;
                    int partitionNo    = absMerHashCode % noOfPartitions;

                    if (repeatedMersFull[partitionNo])
                    {
                        replacementOverflow.Add(kvp.Key, kvp.Value);
                    }
                    else
                    {
                        bool OK = repeatedMers[partitionNo].Add(kvp.Key, kvp.Value);
                        if (!OK)
                        {
                            repeatedMersFull[partitionNo] = true;
                        }
                    }
                }

                overflowMers[t] = replacementOverflow;
            }
        }