private static bool FindMerInUniqueMers(ulong mer, out int plusCount, out int rcCount) { bool foundMer = true; ulong rcMer = MerStrings.ReverseComplement(mer); ulong countPair; bool rcMerWasCanonical = false; rcMerWasCanonical = rcMer < mer; if (rcMerWasCanonical) { mer = rcMer; } if (!uniqueMers.TryGetValue(mer, out countPair)) { //string missingMer = MerStrings.ExpandMer(packedMer); countPair = 0; // not in the table foundMer = false; } // extract the plus, RC and qual values from the packed ulong value if (rcMerWasCanonical) { rcCount = (int)(countPair >> 32); plusCount = (int)(countPair & 0xFFFFFFFF); } else { plusCount = (int)(countPair >> 32); rcCount = (int)(countPair & 0xFFFFFFFF); } return(foundMer); }
public void AddOrIncrement(ulong mer, int threadNo) { long addingIncrement = 0x0000000100000000; // assume we've got the as-read form is the canonical form ulong rcFlagToBeSet = 0x0; // and that we don't want to set the RC flag // generate canonical k-mer first ulong rcMer = MerStrings.ReverseComplement(mer); if (rcMer < mer) { mer = rcMer; addingIncrement = 0x0000000000000001; // increment the low part of the count pair rcFlagToBeSet = singletonRCFlagMask; // remember if the canonical k-mer was the RC form } int absMerHashCode = mer.GetHashCode() & int31Mask; int partitionNo = absMerHashCode % noOfPartitions; int singletonPartitionNo = singletonPrefixBits == 0 ? 0 : (int)(mer >> (64 - singletonPrefixBits)); // this mer may have been seen before, so first try updating it in one of the repeated mer tables bool updatedRepeat = UpdateRepeatedMer(partitionNo, mer, threadNo, mer, addingIncrement); if (updatedRepeat) { return; } // handling a k-mer for the first time - try adding it to the singletons table // ---------------------------------------------------------------------------- // get a stable pointer to the current singetons table (in case someone else fills it and initiates a flush while we're still busy with it) MerCollection thisSingletonPartition = singletonFilters[singletonPartitionNo]; Interlocked.Increment(ref thisSingletonPartition.activeCount); // try to add this mer to this partition's singletons collection (and fetch the existing singleton+flag if it's already there) int filterIdx; ulong fMer = mer | rcFlagToBeSet | singletonActiveFlagMask; bool added = thisSingletonPartition.TryInsertKey(fMer, out filterIdx); if (added) { // successfully added this mer so we must be seeing it for the first time // if singleton table is already full enough, flush it out and empty the table if (thisSingletonPartition.Count >= maxSingletonCapacity[singletonPartitionNo]) { bool flushNeeded = true; int flushNumberToUse = 0; // lock this section to avoid two threads trying to flush/replace the same singleton buffer concurrently lock (lockSingletons) { // test entry condition now that we have the lock (filter may have been reset while we were waiting) if (!thisSingletonPartition.flushed) { // allocate a replacement table for the other threads to use while we're flushing this one int newSingletonLength = thisSingletonPartition.length + thisSingletonPartition.length / 4; if (newSingletonLength > maxSingletonSize) { newSingletonLength = maxSingletonSize; } MerCollection emptySingletonFilter = new MerCollection(newSingletonLength, singletonMerMask); // allocate new local filter for the partition singletonFilters[singletonPartitionNo] = emptySingletonFilter; // make it visible to the concurrent threads (single point assignment) maxSingletonCapacity[singletonPartitionNo] = newSingletonLength * 8 / 10; thisSingletonPartition.flushed = true; flushNumberToUse = flushSingletonNumber[singletonPartitionNo]; flushSingletonNumber[singletonPartitionNo]++; } else { flushNeeded = false; } } if (flushNeeded) { while (thisSingletonPartition.activeCount > 1) { // pause briefly to let any inflight updates to this table to complete Thread.Sleep(100); } FlushSingletons(thisSingletonPartition, singletonPartitionNo, flushNumberToUse); } //flushes++; } } else { // Insert failed, so must be seeing this k-mer for second (or rarely more) time. Mark as inactive in singletons and add to a repeats table with appropriate counts. // There can be a race here with two threads trying to concurrently promote the same singleton. This is resolved by atomically clearing the singleton // active flag - and only one of the threads will get the 'active' flag returned from the Exchange. This thread does the promotion - and then sets the // promotion-complete bit for the singleton. The other threads will spin until they find this bit has been set. if (tracing) { lock (traceUpdates) { traceUpdates.Enqueue(new TraceEntry(threadNo, 1, singletonPartitionNo, filterIdx, (ulong)thisSingletonPartition.entries[filterIdx].key)); if (traceUpdates.Count > maxTrace) { traceUpdates.Dequeue(); } } } // get the current value of this singleton entry (safe because the promotion changes are progressive) ulong merFromFilter = (ulong)thisSingletonPartition.entries[filterIdx].key; // and see if this singleton may have already been promoted bool activeSingleton = (merFromFilter & singletonActiveFlagMask) != 0; // if this singleton may be 'active', try to promote it if (activeSingleton) { ulong inactiveMer = mer & singletonMerMask; // build what the inactive-but-being-promoted entry should look like // if no-one else has altered the singleton entry, then set it to inactive-but-being-promoted long currentMerFromFilter = Interlocked.CompareExchange(ref thisSingletonPartition.entries[filterIdx].key, (long)inactiveMer, (long)merFromFilter); if (tracing) { lock (traceUpdates) { traceUpdates.Enqueue(new TraceEntry(threadNo, 2, singletonPartitionNo, filterIdx, (ulong)currentMerFromFilter)); if (traceUpdates.Count > maxTrace) { traceUpdates.Dequeue(); } } } // if this thread successfully set the singleton to 'inactive', it will take care of the promotion if (currentMerFromFilter == (long)merFromFilter) { ulong rcFlag = merFromFilter & singletonRCFlagMask; // non-zero --> RC found in singletons long initialCount = 0; if (rcFlag != 0) // singleton was seen in RC form { initialCount = 0x0000000000000001; } else // singleton was seen in as-is form { initialCount = 0x0000000100000000; } if (repeatedMersFull[partitionNo]) { if (overflowMers[threadNo] == null) { overflowMers[threadNo] = new MerDictionary(repeatedMers[partitionNo].lengthEntries / 10, fullMerMask); //Console.WriteLine("added overflow for thread " + threadNo + " for [" + partitionNo + "]"); } bool full = overflowMers[threadNo].Add(mer, initialCount); if (full) { overflowMers[threadNo].Resize(); } } else { bool full = repeatedMers[partitionNo].Add(mer, initialCount); if (full) { repeatedMersFull[partitionNo] = true; } } // now that the mer has been promoted, set the 'promoted' flag inactiveMer = inactiveMer | (long)singletonPromotedFlagMask; thisSingletonPartition.entries[filterIdx].key = (long)inactiveMer; if (tracing) { lock (traceUpdates) { traceUpdates.Enqueue(new TraceEntry(threadNo, 3, singletonPartitionNo, filterIdx, (ulong)thisSingletonPartition.entries[filterIdx].key)); if (traceUpdates.Count > maxTrace) { traceUpdates.Dequeue(); } } } } } // singleton is now known to be no longer active, so wait (if necessary) for the 'promoted' flag to be set and increment the repeat counter merFromFilter = (ulong)thisSingletonPartition.entries[filterIdx].key; if (tracing) { lock (traceUpdates) { traceUpdates.Enqueue(new TraceEntry(threadNo, 4, singletonPartitionNo, filterIdx, merFromFilter)); if (traceUpdates.Count > maxTrace) { traceUpdates.Dequeue(); } } } bool promotionComplete = (merFromFilter & singletonPromotedFlagMask) != 0; bool alreadySlept = false; while (!promotionComplete) { promotionComplete = (((ulong)thisSingletonPartition.entries[filterIdx].key & singletonPromotedFlagMask) != 0); if (alreadySlept && !promotionComplete) { if (tracing) { lock (traceUpdates) { StreamWriter trace = new StreamWriter("trace.txt"); foreach (TraceEntry t in traceUpdates) { trace.WriteLine(t.place + "\t" + t.thread + "\t" + t.partition + "\t" + t.index + "\t" + t.value.ToString("x16")); } trace.Close(); } Console.WriteLine("promotion still not complete after sleep"); } } if (!promotionComplete) { Thread.Sleep(100); } alreadySlept = true; } UpdateRepeatedMerAfterPromotion(partitionNo, mer, threadNo, mer, addingIncrement); //if (!updateSucceeded) //{ // lock (traceUpdates) // { // StreamWriter trace = new StreamWriter("trace.txt"); // foreach (TraceEntry t in traceUpdates) // trace.WriteLine(t.thread + "\t" + t.place + "\t" + t.partition + "\t" + t.index + "\t" + t.value.ToString("x16")); // trace.Close(); // } // Console.WriteLine("UpdateRepeatedMerRetry failed after waiting for promotion to complete"); //} } Interlocked.Decrement(ref thisSingletonPartition.activeCount); }
static void PairWorker(object param) { threadParams threadParam = (threadParams)param; int threadNo = (int)threadParam.threadNo; BufferedReader readsFile = threadParam.bufferedReadsFile; bool EOF = false; Sequence[] readHeaderBatch = new Sequence[batchSize]; Sequence[] readBatch = new Sequence[batchSize]; for (int i = 0; i < batchSize; i++) { readHeaderBatch[i] = new Sequence(defaultHeaderLength); readBatch[i] = new Sequence(defaultReadLength); } int readsInBatch = 0; long threadReadsRead = 0; long threadReadsProcessed = 0; ulong[] mersFromRead = new ulong[1000]; bool[] merValid = new bool[1000]; ulong[] canonicalMersFromRead = new ulong[1000]; int[] plusDepths = new int[1000]; int[] rcDepths = new int[1000]; bool deepUnbalanced = false; long threadDeepUnbalancedCount = 0; int minDepth = averageDepth / 20; while (!EOF) { lock (readsFile) { readsInBatch = readsFile.ReadReads(batchSize, readHeaderBatch, readBatch, null, null); if (readsInBatch != batchSize) { EOF = true; } threadReadsRead += readsInBatch; } progressReadsProcessed += readsInBatch; for (int r = 0; r < readsInBatch; r++) { threadReadsProcessed++; Sequence read = readBatch[r]; int readLength = read.Length; if (readLength < 2 * merSize) { continue; } if (readLength < 200) { stepSize = 1; } else { stepSize = 2; } //string target = "GTATATAATAAAGTTTTTTATAAAATTTTAAAAGATCATTATAAAAATATAATAACAATTAATATAATATTAATATACTTTAGTTATAGCTATAAATCTTT"; //if (read.ToString() == target) // Debugger.Break(); int merCount = MerStrings.GenerateMersFromRead(read, merSize, ref mersFromRead, ref merValid); for (int i = 0; i < merCount; i++) { if (merValid[i]) { ulong rcMer = MerStrings.ReverseComplement(mersFromRead[i], merSize); if (rcMer < mersFromRead[i]) { canonicalMersFromRead[i] = rcMer; } else { canonicalMersFromRead[i] = mersFromRead[i]; } } } GetDepthsForRead(merCount, mersFromRead, canonicalMersFromRead, merValid, plusDepths, rcDepths, minDepth, out deepUnbalanced); if (deepUnbalanced) { threadDeepUnbalancedCount++; continue; } ulong pair; int pairDepth; bool gotPair; int startingM = 0; int lastM = read.Length - pairStride; // generate pairs up to the end of the read (used to only generate from first part) while (startingM < lastM) { if (merValid[startingM]) { gotPair = GeneratePairFromRead(mersFromRead, merValid, plusDepths, rcDepths, startingM, merCount, minDepth, out pair, out pairDepth); if (gotPair) { ulong rcPair = MerStrings.ReverseComplement(pair, 32); if (rcPair < pair) { pair = rcPair; } //if (pair == 0x054A0985B90B34D1) // Debugger.Break(); uniquePairs.AddIfNotPresent(pair, pairDepth, threadNo); //lock (pairDictionary) //{ // if (!pairDictionary.ContainsKey(pair)) // pairDictionary.Add(pair, pairDepth); //} //Interlocked.Increment(ref GPTrue); //gotPairFromRead = true; } //else //Interlocked.Increment(ref GPfalse); } startingM += stepSize; } //if (!gotPairFromRead) // threadReadsWithNoPairs++; } } Interlocked.Add(ref totalReadsProcessed, threadReadsProcessed); Interlocked.Add(ref totalReadsRead, threadReadsRead); Interlocked.Add(ref totalDeepUnbalancedReads, threadDeepUnbalancedCount); threadFinished[threadNo].Set(); }