// Entry point from unmanaged code private static void EnsureGateThreadRunning() { Debug.Assert(UsePortableThreadPool); Debug.Assert(!UsePortableThreadPoolForIO); PortableThreadPool.EnsureGateThreadRunning(); }
/// <summary> /// Reduce the number of working workers by one, but maybe add back a worker (possibily this thread) if a thread request comes in while we are marking this thread as not working. /// </summary> private static void RemoveWorkingWorker(PortableThreadPool threadPoolInstance) { ThreadCounts currentCounts = threadPoolInstance._separated.counts.VolatileRead(); while (true) { ThreadCounts newCounts = currentCounts; newCounts.SubtractNumProcessingWork(1); ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, currentCounts); if (oldCounts == currentCounts) { break; } currentCounts = oldCounts; } // It's possible that we decided we had thread requests just before a request came in, // but reduced the worker count *after* the request came in. In this case, we might // miss the notification of a thread request. So we wake up a thread (maybe this one!) // if there is work to do. if (threadPoolInstance._separated.numRequestedWorkers > 0) { MaybeAddWorkingWorker(threadPoolInstance); } }
/// <summary> /// Reduce the number of working workers by one, but maybe add back a worker (possibily this thread) if a thread request comes in while we are marking this thread as not working. /// </summary> private static void RemoveWorkingWorker(PortableThreadPool threadPoolInstance) { // A compare-exchange loop is used instead of Interlocked.Decrement or Interlocked.Add to defensively prevent // NumProcessingWork from underflowing. See the setter for NumProcessingWork. ThreadCounts counts = threadPoolInstance._separated.counts; while (true) { ThreadCounts newCounts = counts; newCounts.NumProcessingWork--; ThreadCounts countsBeforeUpdate = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts); if (countsBeforeUpdate == counts) { break; } counts = countsBeforeUpdate; } // It's possible that we decided we had thread requests just before a request came in, // but reduced the worker count *after* the request came in. In this case, we might // miss the notification of a thread request. So we wake up a thread (maybe this one!) // if there is work to do. if (threadPoolInstance._separated.numRequestedWorkers > 0) { MaybeAddWorkingWorker(threadPoolInstance); } }
private static void CreateGateThread(PortableThreadPool threadPoolInstance) { bool created = false; try { // Thread pool threads must start in the default execution context without transferring the context, so // using UnsafeStart() instead of Start() Thread gateThread = new Thread(GateThreadStart, SmallStackSizeBytes) { IsThreadPoolThread = true, IsBackground = true, Name = ".NET ThreadPool Gate" }; gateThread.UnsafeStart(); created = true; } finally { if (!created) { Interlocked.Exchange(ref threadPoolInstance._separated.gateThreadRunningState, 0); } } }
/// <summary> /// Returns if the current thread should stop processing work on the thread pool. /// A thread should stop processing work on the thread pool when work remains only when /// there are more worker threads in the thread pool than we currently want. /// </summary> /// <returns>Whether or not this thread should stop processing work even if there is still work in the queue.</returns> internal static bool ShouldStopProcessingWorkNow(PortableThreadPool threadPoolInstance) { ThreadCounts counts = threadPoolInstance._separated.counts; while (true) { // When there are more threads processing work than the thread count goal, it may have been decided // to decrease the number of threads. Stop processing if the counts can be updated. We may have more // threads existing than the thread count goal and that is ok, the cold ones will eventually time out if // the thread count goal is not increased again. This logic is a bit different from the original CoreCLR // code from which this implementation was ported, which turns a processing thread into a retired thread // and checks for pending requests like RemoveWorkingWorker. In this implementation there are // no retired threads, so only the count of threads processing work is considered. if (counts.NumProcessingWork <= counts.NumThreadsGoal) { return(false); } ThreadCounts newCounts = counts; newCounts.NumProcessingWork--; ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts); if (oldCounts == counts) { return(true); } counts = oldCounts; } }
// This is called by a worker thread internal static void EnsureRunning(PortableThreadPool threadPoolInstance) { // The callers ensure that this speculative load is sufficient to ensure that the gate thread is activated when // it is needed if (threadPoolInstance._separated.gateThreadRunningState != GetRunningStateForNumRuns(MaxRuns)) { EnsureRunningSlow(threadPoolInstance); } }
internal static void EnsureRunningSlow(PortableThreadPool threadPoolInstance) { int numRunsMask = Interlocked.Exchange(ref threadPoolInstance._separated.gateThreadRunningState, GetRunningStateForNumRuns(MaxRuns)); if (numRunsMask == GetRunningStateForNumRuns(0)) { s_runGateThreadEvent.Set(); } else if ((numRunsMask & GateThreadRunningMask) == 0) { CreateGateThread(threadPoolInstance); } }
/// <summary> /// Reduce the number of working workers by one, but maybe add back a worker (possibily this thread) if a thread request comes in while we are marking this thread as not working. /// </summary> private static void RemoveWorkingWorker(PortableThreadPool threadPoolInstance) { threadPoolInstance._separated.counts.InterlockedDecrementNumProcessingWork(); // It's possible that we decided we had thread requests just before a request came in, // but reduced the worker count *after* the request came in. In this case, we might // miss the notification of a thread request. So we wake up a thread (maybe this one!) // if there is work to do. if (threadPoolInstance._separated.numRequestedWorkers > 0) { MaybeAddWorkingWorker(threadPoolInstance); } }
private static bool TakeActiveRequest(PortableThreadPool threadPoolInstance) { int count = threadPoolInstance._separated.numRequestedWorkers; while (count > 0) { int prevCount = Interlocked.CompareExchange(ref threadPoolInstance._separated.numRequestedWorkers, count - 1, count); if (prevCount == count) { return(true); } count = prevCount; } return(false); }
// called by logic to spawn new worker threads, return true if it's been too long // since the last dequeue operation - takes number of worker threads into account // in deciding "too long" private static bool SufficientDelaySinceLastDequeue(PortableThreadPool threadPoolInstance) { uint delay = (uint)(Environment.TickCount - threadPoolInstance._separated.lastDequeueTime); uint minimumDelay; if (threadPoolInstance._cpuUtilization < CpuUtilizationLow) { minimumDelay = GateActivitiesPeriodMs; } else { minimumDelay = (uint)threadPoolInstance._separated.numThreadsGoal * DequeueDelayThresholdMs; } return(delay > minimumDelay); }
// called by logic to spawn new worker threads, return true if it's been too long // since the last dequeue operation - takes number of worker threads into account // in deciding "too long" private static bool SufficientDelaySinceLastDequeue(PortableThreadPool threadPoolInstance) { int delay = Environment.TickCount - Volatile.Read(ref threadPoolInstance._separated.lastDequeueTime); int minimumDelay; if (threadPoolInstance._cpuUtilization < CpuUtilizationLow) { minimumDelay = GateThreadDelayMs; } else { ThreadCounts counts = threadPoolInstance._separated.counts.VolatileRead(); int numThreads = counts.NumThreadsGoal; minimumDelay = numThreads * DequeueDelayThresholdMs; } return(delay > minimumDelay); }
/// <summary> /// Reduce the number of working workers by one, but maybe add back a worker (possibily this thread) if a thread request comes in while we are marking this thread as not working. /// </summary> private static void RemoveWorkingWorker(PortableThreadPool threadPoolInstance) { ThreadCounts currentCounts = threadPoolInstance._separated.counts.VolatileRead(); while (true) { ThreadCounts newCounts = currentCounts; newCounts.SubtractNumProcessingWork(1); ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, currentCounts); if (oldCounts == currentCounts) { break; } currentCounts = oldCounts; } if (currentCounts.NumProcessingWork > 1) { // In highly bursty cases with short bursts of work, especially in the portable thread pool implementation, // worker threads are being released and entering Dispatch very quickly, not finding much work in Dispatch, // and soon afterwards going back to Dispatch, causing extra thrashing on data and some interlocked // operations. If this is not the last thread to stop processing work, introduce a slight delay to help // other threads make more efficient progress. The spin-wait is mainly for when the sleep is not effective // due to there being no other threads to schedule. Thread.UninterruptibleSleep0(); if (!Environment.IsSingleProcessor) { Thread.SpinWait(1); } } // It's possible that we decided we had thread requests just before a request came in, // but reduced the worker count *after* the request came in. In this case, we might // miss the notification of a thread request. So we wake up a thread (maybe this one!) // if there is work to do. if (threadPoolInstance._separated.numRequestedWorkers > 0) { MaybeAddWorkingWorker(threadPoolInstance); } }
private static void CreateGateThread(PortableThreadPool threadPoolInstance) { bool created = false; try { Thread gateThread = new Thread(GateThreadStart, SmallStackSizeBytes); gateThread.IsThreadPoolThread = true; gateThread.IsBackground = true; gateThread.Name = ".NET ThreadPool Gate"; gateThread.Start(); created = true; } finally { if (!created) { Interlocked.Exchange(ref threadPoolInstance._separated.gateThreadRunningState, 0); } } }
/// <summary> /// Unregister a wait handle. /// </summary> /// <param name="handle">The wait handle to unregister.</param> /// <param name="blocking">Should the unregistration block at all.</param> private void UnregisterWait(RegisteredWaitHandle handle, bool blocking) { bool pendingRemoval = false; // TODO: Optimization: Try to unregister wait directly if it isn't being waited on. PortableThreadPool threadPoolInstance = ThreadPoolInstance; threadPoolInstance._waitThreadLock.Acquire(); try { // If this handle is not already pending removal and hasn't already been removed if (Array.IndexOf(_registeredWaits, handle) != -1) { if (Array.IndexOf(_pendingRemoves, handle) == -1) { _pendingRemoves[_numPendingRemoves++] = handle; _changeHandlesEvent.Set(); // Tell the wait thread that there are changes pending. } pendingRemoval = true; } } finally { threadPoolInstance._waitThreadLock.Release(); } if (blocking) { if (handle.IsBlocking) { handle.WaitForCallbacks(); } else if (pendingRemoval) { handle.WaitForRemoval(); } } }
internal static void MaybeAddWorkingWorker(PortableThreadPool threadPoolInstance) { ThreadCounts counts = threadPoolInstance._separated.counts; short numExistingThreads, numProcessingWork, newNumExistingThreads, newNumProcessingWork; while (true) { numProcessingWork = counts.NumProcessingWork; if (numProcessingWork >= counts.NumThreadsGoal) { return; } newNumProcessingWork = (short)(numProcessingWork + 1); numExistingThreads = counts.NumExistingThreads; newNumExistingThreads = Math.Max(numExistingThreads, newNumProcessingWork); ThreadCounts newCounts = counts; newCounts.NumProcessingWork = newNumProcessingWork; newCounts.NumExistingThreads = newNumExistingThreads; ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts); if (oldCounts == counts) { break; } counts = oldCounts; } int toCreate = newNumExistingThreads - numExistingThreads; int toRelease = newNumProcessingWork - numProcessingWork; if (toRelease > 0) { s_semaphore.Release(toRelease); } while (toCreate > 0) { if (TryCreateWorkerThread()) { toCreate--; continue; } counts = threadPoolInstance._separated.counts; while (true) { ThreadCounts newCounts = counts; newCounts.NumProcessingWork -= (short)toCreate; newCounts.NumExistingThreads -= (short)toCreate; ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts); if (oldCounts == counts) { break; } counts = oldCounts; } break; } }
public static void Wake(PortableThreadPool threadPoolInstance) { DelayEvent.Set(); EnsureRunning(threadPoolInstance); }
public (int newThreadCount, int newSampleMs) Update(int currentThreadCount, double sampleDurationSeconds, int numCompletions) { // // If someone changed the thread count without telling us, update our records accordingly. // if (currentThreadCount != _lastThreadCount) { ForceChange(currentThreadCount, StateOrTransition.Initializing); } // // Update the cumulative stats for this thread count // _secondsElapsedSinceLastChange += sampleDurationSeconds; _completionsSinceLastChange += numCompletions; // // Add in any data we've already collected about this sample // sampleDurationSeconds += _accumulatedSampleDurationSeconds; numCompletions += _accumulatedCompletionCount; // // We need to make sure we're collecting reasonably accurate data. Since we're just counting the end // of each work item, we are goinng to be missing some data about what really happened during the // sample interval. The count produced by each thread includes an initial work item that may have // started well before the start of the interval, and each thread may have been running some new // work item for some time before the end of the interval, which did not yet get counted. So // our count is going to be off by +/- threadCount workitems. // // The exception is that the thread that reported to us last time definitely wasn't running any work // at that time, and the thread that's reporting now definitely isn't running a work item now. So // we really only need to consider threadCount-1 threads. // // Thus the percent error in our count is +/- (threadCount-1)/numCompletions. // // We cannot rely on the frequency-domain analysis we'll be doing later to filter out this error, because // of the way it accumulates over time. If this sample is off by, say, 33% in the negative direction, // then the next one likely will be too. The one after that will include the sum of the completions // we missed in the previous samples, and so will be 33% positive. So every three samples we'll have // two "low" samples and one "high" sample. This will appear as periodic variation right in the frequency // range we're targeting, which will not be filtered by the frequency-domain translation. // if (_totalSamples > 0 && ((currentThreadCount - 1.0) / numCompletions) >= _maxSampleError) { // not accurate enough yet. Let's accumulate the data so far, and tell the ThreadPool // to collect a little more. _accumulatedSampleDurationSeconds = sampleDurationSeconds; _accumulatedCompletionCount = numCompletions; return(currentThreadCount, 10); } // // We've got enouugh data for our sample; reset our accumulators for next time. // _accumulatedSampleDurationSeconds = 0; _accumulatedCompletionCount = 0; // // Add the current thread count and throughput sample to our history // double throughput = numCompletions / sampleDurationSeconds; if (NativeRuntimeEventSource.Log.IsEnabled()) { NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadAdjustmentSample(throughput); } int sampleIndex = (int)(_totalSamples % _samplesToMeasure); _samples[sampleIndex] = throughput; _threadCounts[sampleIndex] = currentThreadCount; _totalSamples++; // // Set up defaults for our metrics // Complex threadWaveComponent = default; Complex throughputWaveComponent = default; double throughputErrorEstimate = 0; Complex ratio = default; double confidence = 0; StateOrTransition state = StateOrTransition.Warmup; // // How many samples will we use? It must be at least the three wave periods we're looking for, and it must also be a whole // multiple of the primary wave's period; otherwise the frequency we're looking for will fall between two frequency bands // in the Fourier analysis, and we won't be able to measure it accurately. // int sampleCount = ((int)Math.Min(_totalSamples - 1, _samplesToMeasure)) / _wavePeriod * _wavePeriod; if (sampleCount > _wavePeriod) { // // Average the throughput and thread count samples, so we can scale the wave magnitudes later. // double sampleSum = 0; double threadSum = 0; for (int i = 0; i < sampleCount; i++) { sampleSum += _samples[(_totalSamples - sampleCount + i) % _samplesToMeasure]; threadSum += _threadCounts[(_totalSamples - sampleCount + i) % _samplesToMeasure]; } double averageThroughput = sampleSum / sampleCount; double averageThreadCount = threadSum / sampleCount; if (averageThroughput > 0 && averageThreadCount > 0) { // // Calculate the periods of the adjacent frequency bands we'll be using to measure noise levels. // We want the two adjacent Fourier frequency bands. // double adjacentPeriod1 = sampleCount / (((double)sampleCount / _wavePeriod) + 1); double adjacentPeriod2 = sampleCount / (((double)sampleCount / _wavePeriod) - 1); // // Get the the three different frequency components of the throughput (scaled by average // throughput). Our "error" estimate (the amount of noise that might be present in the // frequency band we're really interested in) is the average of the adjacent bands. // throughputWaveComponent = GetWaveComponent(_samples, sampleCount, _wavePeriod) / averageThroughput; throughputErrorEstimate = (GetWaveComponent(_samples, sampleCount, adjacentPeriod1) / averageThroughput).Abs(); if (adjacentPeriod2 <= sampleCount) { throughputErrorEstimate = Math.Max(throughputErrorEstimate, (GetWaveComponent(_samples, sampleCount, adjacentPeriod2) / averageThroughput).Abs()); } // // Do the same for the thread counts, so we have something to compare to. We don't measure thread count // noise, because there is none; these are exact measurements. // threadWaveComponent = GetWaveComponent(_threadCounts, sampleCount, _wavePeriod) / averageThreadCount; // // Update our moving average of the throughput noise. We'll use this later as feedback to // determine the new size of the thread wave. // if (_averageThroughputNoise == 0) { _averageThroughputNoise = throughputErrorEstimate; } else { _averageThroughputNoise = (_throughputErrorSmoothingFactor * throughputErrorEstimate) + ((1.0 - _throughputErrorSmoothingFactor) * _averageThroughputNoise); } if (threadWaveComponent.Abs() > 0) { // // Adjust the throughput wave so it's centered around the target wave, and then calculate the adjusted throughput/thread ratio. // ratio = (throughputWaveComponent - (_targetThroughputRatio * threadWaveComponent)) / threadWaveComponent; state = StateOrTransition.ClimbingMove; } else { ratio = new Complex(0, 0); state = StateOrTransition.Stabilizing; } // // Calculate how confident we are in the ratio. More noise == less confident. This has // the effect of slowing down movements that might be affected by random noise. // double noiseForConfidence = Math.Max(_averageThroughputNoise, throughputErrorEstimate); if (noiseForConfidence > 0) { confidence = (threadWaveComponent.Abs() / noiseForConfidence) / _targetSignalToNoiseRatio; } else { confidence = 1.0; //there is no noise! } } } // // We use just the real part of the complex ratio we just calculated. If the throughput signal // is exactly in phase with the thread signal, this will be the same as taking the magnitude of // the complex move and moving that far up. If they're 180 degrees out of phase, we'll move // backward (because this indicates that our changes are having the opposite of the intended effect). // If they're 90 degrees out of phase, we won't move at all, because we can't tell whether we're // having a negative or positive effect on throughput. // double move = Math.Min(1.0, Math.Max(-1.0, ratio.Real)); // // Apply our confidence multiplier. // move *= Math.Min(1.0, Math.Max(0.0, confidence)); // // Now apply non-linear gain, such that values around zero are attenuated, while higher values // are enhanced. This allows us to move quickly if we're far away from the target, but more slowly // if we're getting close, giving us rapid ramp-up without wild oscillations around the target. // double gain = _maxChangePerSecond * sampleDurationSeconds; move = Math.Pow(Math.Abs(move), _gainExponent) * (move >= 0.0 ? 1 : -1) * gain; move = Math.Min(move, _maxChangePerSample); // // If the result was positive, and CPU is > 95%, refuse the move. // PortableThreadPool threadPoolInstance = ThreadPoolInstance; if (move > 0.0 && threadPoolInstance._cpuUtilization > CpuUtilizationHigh) { move = 0.0; } // // Apply the move to our control setting // _currentControlSetting += move; // // Calculate the new thread wave magnitude, which is based on the moving average we've been keeping of // the throughput error. This average starts at zero, so we'll start with a nice safe little wave at first. // int newThreadWaveMagnitude = (int)(0.5 + (_currentControlSetting * _averageThroughputNoise * _targetSignalToNoiseRatio * _threadMagnitudeMultiplier * 2.0)); newThreadWaveMagnitude = Math.Min(newThreadWaveMagnitude, _maxThreadWaveMagnitude); newThreadWaveMagnitude = Math.Max(newThreadWaveMagnitude, 1); // // Make sure our control setting is within the ThreadPool's limits // int maxThreads = threadPoolInstance._maxThreads; int minThreads = threadPoolInstance._minThreads; _currentControlSetting = Math.Min(maxThreads - newThreadWaveMagnitude, _currentControlSetting); _currentControlSetting = Math.Max(minThreads, _currentControlSetting); // // Calculate the new thread count (control setting + square wave) // int newThreadCount = (int)(_currentControlSetting + newThreadWaveMagnitude * ((_totalSamples / (_wavePeriod / 2)) % 2)); // // Make sure the new thread count doesn't exceed the ThreadPool's limits // newThreadCount = Math.Min(maxThreads, newThreadCount); newThreadCount = Math.Max(minThreads, newThreadCount); // // Record these numbers for posterity // if (NativeRuntimeEventSource.Log.IsEnabled()) { NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadAdjustmentStats(sampleDurationSeconds, throughput, threadWaveComponent.Real, throughputWaveComponent.Real, throughputErrorEstimate, _averageThroughputNoise, ratio.Real, confidence, _currentControlSetting, (ushort)newThreadWaveMagnitude); } // // If all of this caused an actual change in thread count, log that as well. // if (newThreadCount != currentThreadCount) { ChangeThreadCount(newThreadCount, state); } // // Return the new thread count and sample interval. This is randomized to prevent correlations with other periodic // changes in throughput. Among other things, this prevents us from getting confused by Hill Climbing instances // running in other processes. // // If we're at minThreads, and we seem to be hurting performance by going higher, we can't go any lower to fix this. So // we'll simply stay at minThreads much longer, and only occasionally try a higher value. // int newSampleInterval; if (ratio.Real < 0.0 && newThreadCount == minThreads) { newSampleInterval = (int)(0.5 + _currentSampleMs * (10.0 * Math.Min(-ratio.Real, 1.0))); } else { newSampleInterval = _currentSampleMs; } return(newThreadCount, newSampleInterval); }
/// <summary> /// Go through the <see cref="_pendingRemoves"/> array and remove those registered wait handles from the <see cref="_registeredWaits"/> /// and <see cref="_waitHandles"/> arrays, filling the holes along the way. /// </summary> private int ProcessRemovals() { PortableThreadPool threadPoolInstance = ThreadPoolInstance; threadPoolInstance._waitThreadLock.Acquire(); try { Debug.Assert(_numPendingRemoves >= 0); Debug.Assert(_numPendingRemoves <= _pendingRemoves.Length); Debug.Assert(_numUserWaits >= 0); Debug.Assert(_numUserWaits <= _registeredWaits.Length); Debug.Assert(_numPendingRemoves <= _numUserWaits, $"Num removals {_numPendingRemoves} should be less than or equal to num user waits {_numUserWaits}"); if (_numPendingRemoves == 0 || _numUserWaits == 0) { return(_numUserWaits); // return the value taken inside the lock for the caller } int originalNumUserWaits = _numUserWaits; int originalNumPendingRemoves = _numPendingRemoves; // This is O(N^2), but max(N) = 63 and N will usually be very low for (int i = 0; i < _numPendingRemoves; i++) { RegisteredWaitHandle waitHandleToRemove = _pendingRemoves[i] !; int numUserWaits = _numUserWaits; int j = 0; for (; j < numUserWaits && waitHandleToRemove != _registeredWaits[j]; j++) { } Debug.Assert(j < numUserWaits); waitHandleToRemove.OnRemoveWait(); if (j + 1 < numUserWaits) { // Not removing the last element. Due to the possibility of there being duplicate system wait // objects in the wait array, perhaps even with different handle values due to the use of // DuplicateHandle(), don't reorder handles for fairness. When there are duplicate system wait // objects in the wait array and the wait object gets signaled, the system may release the wait in // in deterministic order based on the order in the wait array. Instead, shift the array. int removeAt = j; int count = numUserWaits; Array.Copy(_registeredWaits, removeAt + 1, _registeredWaits, removeAt, count - (removeAt + 1)); _registeredWaits[count - 1] = null !; // Corresponding elements in the wait handles array are shifted up by one removeAt++; count++; Array.Copy(_waitHandles, removeAt + 1, _waitHandles, removeAt, count - (removeAt + 1)); _waitHandles[count - 1] = null !; } else { // Removing the last element _registeredWaits[j] = null !; _waitHandles[j + 1] = null !; } _numUserWaits = numUserWaits - 1; _pendingRemoves[i] = null; waitHandleToRemove.Handle.DangerousRelease(); } _numPendingRemoves = 0; Debug.Assert(originalNumUserWaits - originalNumPendingRemoves == _numUserWaits, $"{originalNumUserWaits} - {originalNumPendingRemoves} == {_numUserWaits}"); return(_numUserWaits); // return the value taken inside the lock for the caller } finally { threadPoolInstance._waitThreadLock.Release(); } }
void IThreadPoolWorkItem.Execute() => PortableThreadPool.CompleteWait(_registeredWaitHandle, _timedOut);
// Entry point from unmanaged code private void CompleteWait() { Debug.Assert(ThreadPool.UsePortableThreadPool); PortableThreadPool.CompleteWait(_registeredWaitHandle, _timedOut); }
private static void WorkerThreadStart() { Thread.CurrentThread.SetThreadPoolWorkerThreadName(); PortableThreadPool threadPoolInstance = ThreadPoolInstance; if (PortableThreadPoolEventSource.Log.IsEnabled(EventLevel.Informational, PortableThreadPoolEventSource.Keywords.ThreadingKeyword)) { PortableThreadPoolEventSource.Log.ThreadPoolWorkerThreadStart( (uint)threadPoolInstance._separated.counts.VolatileRead().NumExistingThreads); } LowLevelLock hillClimbingThreadAdjustmentLock = threadPoolInstance._hillClimbingThreadAdjustmentLock; LowLevelLifoSemaphore semaphore = s_semaphore; while (true) { bool spinWait = true; while (semaphore.Wait(ThreadPoolThreadTimeoutMs, spinWait)) { bool alreadyRemovedWorkingWorker = false; while (TakeActiveRequest(threadPoolInstance)) { Volatile.Write(ref threadPoolInstance._separated.lastDequeueTime, Environment.TickCount); if (!ThreadPoolWorkQueue.Dispatch()) { // ShouldStopProcessingWorkNow() caused the thread to stop processing work, and it would have // already removed this working worker in the counts. This typically happens when hill climbing // decreases the worker thread count goal. alreadyRemovedWorkingWorker = true; break; } } // Don't spin-wait on the semaphore next time if the thread was actively stopped from processing work, // as it's unlikely that the worker thread count goal would be increased again so soon afterwards that // the semaphore would be released within the spin-wait window spinWait = !alreadyRemovedWorkingWorker; if (!alreadyRemovedWorkingWorker) { // If we woke up but couldn't find a request, or ran out of work items to process, we need to update // the number of working workers to reflect that we are done working for now RemoveWorkingWorker(threadPoolInstance); } } hillClimbingThreadAdjustmentLock.Acquire(); try { // At this point, the thread's wait timed out. We are shutting down this thread. // We are going to decrement the number of exisiting threads to no longer include this one // and then change the max number of threads in the thread pool to reflect that we don't need as many // as we had. Finally, we are going to tell hill climbing that we changed the max number of threads. ThreadCounts counts = threadPoolInstance._separated.counts.VolatileRead(); while (true) { // Since this thread is currently registered as an existing thread, if more work comes in meanwhile, // this thread would be expected to satisfy the new work. Ensure that NumExistingThreads is not // decreased below NumProcessingWork, as that would be indicative of such a case. short numExistingThreads = counts.NumExistingThreads; if (numExistingThreads <= counts.NumProcessingWork) { // In this case, enough work came in that this thread should not time out and should go back to work. break; } ThreadCounts newCounts = counts; newCounts.SubtractNumExistingThreads(1); short newNumExistingThreads = (short)(numExistingThreads - 1); short newNumThreadsGoal = Math.Max(threadPoolInstance._minThreads, Math.Min(newNumExistingThreads, newCounts.NumThreadsGoal)); newCounts.NumThreadsGoal = newNumThreadsGoal; ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts); if (oldCounts == counts) { HillClimbing.ThreadPoolHillClimber.ForceChange(newNumThreadsGoal, HillClimbing.StateOrTransition.ThreadTimedOut); if (PortableThreadPoolEventSource.Log.IsEnabled(EventLevel.Informational, PortableThreadPoolEventSource.Keywords.ThreadingKeyword)) { PortableThreadPoolEventSource.Log.ThreadPoolWorkerThreadStop((uint)newNumExistingThreads); } return; } counts = oldCounts; } } finally { hillClimbingThreadAdjustmentLock.Release(); } } }
private static void GateThreadStart() { bool disableStarvationDetection = AppContextConfigHelper.GetBooleanConfig("System.Threading.ThreadPool.DisableStarvationDetection", false); bool debuggerBreakOnWorkStarvation = AppContextConfigHelper.GetBooleanConfig("System.Threading.ThreadPool.DebugBreakOnWorkerStarvation", false); // The first reading is over a time range other than what we are focusing on, so we do not use the read other // than to send it to any runtime-specific implementation that may also use the CPU utilization. CpuUtilizationReader cpuUtilizationReader = default; _ = cpuUtilizationReader.CurrentUtilization; PortableThreadPool threadPoolInstance = ThreadPoolInstance; LowLevelLock threadAdjustmentLock = threadPoolInstance._threadAdjustmentLock; DelayHelper delayHelper = default; if (BlockingConfig.IsCooperativeBlockingEnabled) { // Initialize memory usage and limits, and register to update them on gen 2 GCs threadPoolInstance.OnGen2GCCallback(); Gen2GcCallback.Register(threadPoolInstance.OnGen2GCCallback); } while (true) { RunGateThreadEvent.WaitOne(); int currentTimeMs = Environment.TickCount; delayHelper.SetGateActivitiesTime(currentTimeMs); while (true) { bool wasSignaledToWake = DelayEvent.WaitOne((int)delayHelper.GetNextDelay(currentTimeMs)); currentTimeMs = Environment.TickCount; // Thread count adjustment for cooperative blocking do { PendingBlockingAdjustment pendingBlockingAdjustment = threadPoolInstance._pendingBlockingAdjustment; if (pendingBlockingAdjustment == PendingBlockingAdjustment.None) { delayHelper.ClearBlockingAdjustmentDelay(); break; } bool previousDelayElapsed = false; if (delayHelper.HasBlockingAdjustmentDelay) { previousDelayElapsed = delayHelper.HasBlockingAdjustmentDelayElapsed(currentTimeMs, wasSignaledToWake); if (pendingBlockingAdjustment == PendingBlockingAdjustment.WithDelayIfNecessary && !previousDelayElapsed) { break; } } uint nextDelayMs = threadPoolInstance.PerformBlockingAdjustment(previousDelayElapsed); if (nextDelayMs <= 0) { delayHelper.ClearBlockingAdjustmentDelay(); } else { delayHelper.SetBlockingAdjustmentTimeAndDelay(currentTimeMs, nextDelayMs); } } while (false); // // Periodic gate activities // if (!delayHelper.ShouldPerformGateActivities(currentTimeMs, wasSignaledToWake)) { continue; } if (ThreadPool.EnableWorkerTracking && NativeRuntimeEventSource.Log.IsEnabled()) { NativeRuntimeEventSource.Log.ThreadPoolWorkingThreadCount( (uint)threadPoolInstance.GetAndResetHighWatermarkCountOfThreadsProcessingUserCallbacks()); } int cpuUtilization = cpuUtilizationReader.CurrentUtilization; threadPoolInstance._cpuUtilization = cpuUtilization; bool needGateThreadForRuntime = ThreadPool.PerformRuntimeSpecificGateActivities(cpuUtilization); if (!disableStarvationDetection && threadPoolInstance._pendingBlockingAdjustment == PendingBlockingAdjustment.None && threadPoolInstance._separated.numRequestedWorkers > 0 && SufficientDelaySinceLastDequeue(threadPoolInstance)) { bool addWorker = false; threadAdjustmentLock.Acquire(); try { // Don't add a thread if we're at max or if we are already in the process of adding threads. // This logic is slightly different from the native implementation in CoreCLR because there are // no retired threads. In the native implementation, when hill climbing reduces the thread count // goal, threads that are stopped from processing work are switched to "retired" state, and they // don't count towards the equivalent existing thread count. In this implementation, the // existing thread count includes any worker thread that has not yet exited, including those // stopped from working by hill climbing, so here the number of threads processing work, instead // of the number of existing threads, is compared with the goal. There may be alternative // solutions, for now this is only to maintain consistency in behavior. ThreadCounts counts = threadPoolInstance._separated.counts; if (counts.NumProcessingWork < threadPoolInstance._maxThreads && counts.NumProcessingWork >= threadPoolInstance._separated.numThreadsGoal) { if (debuggerBreakOnWorkStarvation) { Debugger.Break(); } short newNumThreadsGoal = (short)(counts.NumProcessingWork + 1); threadPoolInstance._separated.numThreadsGoal = newNumThreadsGoal; HillClimbing.ThreadPoolHillClimber.ForceChange( newNumThreadsGoal, HillClimbing.StateOrTransition.Starvation); addWorker = true; } } finally { threadAdjustmentLock.Release(); } if (addWorker) { WorkerThread.MaybeAddWorkingWorker(threadPoolInstance); } } if (!needGateThreadForRuntime && threadPoolInstance._separated.numRequestedWorkers <= 0 && threadPoolInstance._pendingBlockingAdjustment == PendingBlockingAdjustment.None && Interlocked.Decrement(ref threadPoolInstance._separated.gateThreadRunningState) <= GetRunningStateForNumRuns(0)) { break; } } } }
// Entry point from unmanaged code private void CompleteWait() { PortableThreadPool.CompleteWait(_registeredWaitHandle, _timedOut); }
private static void WorkerThreadStart() { Thread.CurrentThread.SetThreadPoolWorkerThreadName(); PortableThreadPool threadPoolInstance = ThreadPoolInstance; if (NativeRuntimeEventSource.Log.IsEnabled()) { NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadStart( (uint)threadPoolInstance._separated.counts.VolatileRead().NumExistingThreads); } LowLevelLock threadAdjustmentLock = threadPoolInstance._threadAdjustmentLock; LowLevelLifoSemaphore semaphore = s_semaphore; while (true) { bool spinWait = true; while (semaphore.Wait(ThreadPoolThreadTimeoutMs, spinWait)) { bool alreadyRemovedWorkingWorker = false; while (TakeActiveRequest(threadPoolInstance)) { threadPoolInstance._separated.lastDequeueTime = Environment.TickCount; if (!ThreadPoolWorkQueue.Dispatch()) { // ShouldStopProcessingWorkNow() caused the thread to stop processing work, and it would have // already removed this working worker in the counts. This typically happens when hill climbing // decreases the worker thread count goal. alreadyRemovedWorkingWorker = true; break; } if (threadPoolInstance._separated.numRequestedWorkers <= 0) { break; } // In highly bursty cases with short bursts of work, especially in the portable thread pool // implementation, worker threads are being released and entering Dispatch very quickly, not finding // much work in Dispatch, and soon afterwards going back to Dispatch, causing extra thrashing on // data and some interlocked operations, and similarly when the thread pool runs out of work. Since // there is a pending request for work, introduce a slight delay before serving the next request. // The spin-wait is mainly for when the sleep is not effective due to there being no other threads // to schedule. Thread.UninterruptibleSleep0(); if (!Environment.IsSingleProcessor) { Thread.SpinWait(1); } } // Don't spin-wait on the semaphore next time if the thread was actively stopped from processing work, // as it's unlikely that the worker thread count goal would be increased again so soon afterwards that // the semaphore would be released within the spin-wait window spinWait = !alreadyRemovedWorkingWorker; if (!alreadyRemovedWorkingWorker) { // If we woke up but couldn't find a request, or ran out of work items to process, we need to update // the number of working workers to reflect that we are done working for now RemoveWorkingWorker(threadPoolInstance); } } threadAdjustmentLock.Acquire(); try { // At this point, the thread's wait timed out. We are shutting down this thread. // We are going to decrement the number of existing threads to no longer include this one // and then change the max number of threads in the thread pool to reflect that we don't need as many // as we had. Finally, we are going to tell hill climbing that we changed the max number of threads. ThreadCounts counts = threadPoolInstance._separated.counts; while (true) { // Since this thread is currently registered as an existing thread, if more work comes in meanwhile, // this thread would be expected to satisfy the new work. Ensure that NumExistingThreads is not // decreased below NumProcessingWork, as that would be indicative of such a case. if (counts.NumExistingThreads <= counts.NumProcessingWork) { // In this case, enough work came in that this thread should not time out and should go back to work. break; } ThreadCounts newCounts = counts; short newNumExistingThreads = --newCounts.NumExistingThreads; short newNumThreadsGoal = Math.Max( threadPoolInstance.MinThreadsGoal, Math.Min(newNumExistingThreads, counts.NumThreadsGoal)); newCounts.NumThreadsGoal = newNumThreadsGoal; ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts); if (oldCounts == counts) { HillClimbing.ThreadPoolHillClimber.ForceChange( newNumThreadsGoal, HillClimbing.StateOrTransition.ThreadTimedOut); if (NativeRuntimeEventSource.Log.IsEnabled()) { NativeRuntimeEventSource.Log.ThreadPoolWorkerThreadStop((uint)newNumExistingThreads); } return; } counts = oldCounts; } } finally { threadAdjustmentLock.Release(); } } }
private static void GateThreadStart() { bool disableStarvationDetection = AppContextConfigHelper.GetBooleanConfig("System.Threading.ThreadPool.DisableStarvationDetection", false); bool debuggerBreakOnWorkStarvation = AppContextConfigHelper.GetBooleanConfig("System.Threading.ThreadPool.DebugBreakOnWorkerStarvation", false); // The first reading is over a time range other than what we are focusing on, so we do not use the read other // than to send it to any runtime-specific implementation that may also use the CPU utilization. CpuUtilizationReader cpuUtilizationReader = default; _ = cpuUtilizationReader.CurrentUtilization; PortableThreadPool threadPoolInstance = ThreadPoolInstance; LowLevelLock hillClimbingThreadAdjustmentLock = threadPoolInstance._hillClimbingThreadAdjustmentLock; while (true) { s_runGateThreadEvent.WaitOne(); bool needGateThreadForRuntime; do { Thread.Sleep(GateThreadDelayMs); if (ThreadPool.EnableWorkerTracking && PortableThreadPoolEventSource.Log.IsEnabled()) { PortableThreadPoolEventSource.Log.ThreadPoolWorkingThreadCount( (uint)threadPoolInstance.GetAndResetHighWatermarkCountOfThreadsProcessingUserCallbacks()); } int cpuUtilization = cpuUtilizationReader.CurrentUtilization; threadPoolInstance._cpuUtilization = cpuUtilization; needGateThreadForRuntime = ThreadPool.PerformRuntimeSpecificGateActivities(cpuUtilization); if (!disableStarvationDetection && threadPoolInstance._separated.numRequestedWorkers > 0 && SufficientDelaySinceLastDequeue(threadPoolInstance)) { try { hillClimbingThreadAdjustmentLock.Acquire(); ThreadCounts counts = threadPoolInstance._separated.counts.VolatileRead(); // Don't add a thread if we're at max or if we are already in the process of adding threads. // This logic is slightly different from the native implementation in CoreCLR because there are // no retired threads. In the native implementation, when hill climbing reduces the thread count // goal, threads that are stopped from processing work are switched to "retired" state, and they // don't count towards the equivalent existing thread count. In this implementation, the // existing thread count includes any worker thread that has not yet exited, including those // stopped from working by hill climbing, so here the number of threads processing work, instead // of the number of existing threads, is compared with the goal. There may be alternative // solutions, for now this is only to maintain consistency in behavior. while ( counts.NumExistingThreads < threadPoolInstance._maxThreads && counts.NumProcessingWork >= counts.NumThreadsGoal) { if (debuggerBreakOnWorkStarvation) { Debugger.Break(); } ThreadCounts newCounts = counts; short newNumThreadsGoal = (short)(counts.NumProcessingWork + 1); newCounts.NumThreadsGoal = newNumThreadsGoal; ThreadCounts oldCounts = threadPoolInstance._separated.counts.InterlockedCompareExchange(newCounts, counts); if (oldCounts == counts) { HillClimbing.ThreadPoolHillClimber.ForceChange(newNumThreadsGoal, HillClimbing.StateOrTransition.Starvation); WorkerThread.MaybeAddWorkingWorker(threadPoolInstance); break; } counts = oldCounts; } } finally { hillClimbingThreadAdjustmentLock.Release(); } } } while ( needGateThreadForRuntime || threadPoolInstance._separated.numRequestedWorkers > 0 || Interlocked.Decrement(ref threadPoolInstance._separated.gateThreadRunningState) > GetRunningStateForNumRuns(0)); } }