/// <summary> /// Awakens a list of set indices. /// </summary> /// <param name="setIndices">List of set indices to wake up.</param> /// <param name="threadDispatcher">Thread dispatcher to use when waking the bodies. Pass null to run on a single thread.</param> public void AwakenSets(ref QuickList <int, Buffer <int> > setIndices, IThreadDispatcher threadDispatcher = null) { QuickList <int, Buffer <int> > .Create(pool.SpecializeFor <int>(), setIndices.Count, out var uniqueSetIndices); var uniqueSet = new IndexSet(pool, bodies.Sets.Length); AccumulateUniqueIndices(ref setIndices, ref uniqueSet, ref uniqueSetIndices); uniqueSet.Dispose(pool); //Note that we use the same codepath as multithreading, we just don't use a multithreaded dispatch to execute jobs. //TODO: It would probably be a good idea to add a little heuristic to avoid doing multithreaded dispatches if there are only like 5 total bodies. //Shouldn't matter too much- the threaded variant should only really be used when doing big batched changes, so having a fixed constant cost isn't that bad. int threadCount = threadDispatcher == null ? 1 : threadDispatcher.ThreadCount; //Note that direct wakes always reset activity states. I suspect this is sufficiently universal that no one will ever want the alternative, //even though the narrowphase does avoid resetting activity states for the sake of faster resleeping when possible. var(phaseOneJobCount, phaseTwoJobCount) = PrepareJobs(ref uniqueSetIndices, true, threadCount); if (threadCount > 1) { this.jobIndex = -1; this.jobCount = phaseOneJobCount; threadDispatcher.DispatchWorkers(phaseOneWorkerDelegate); } else { for (int i = 0; i < phaseOneJobCount; ++i) { ExecutePhaseOneJob(i); } } if (threadCount > 1) { this.jobIndex = -1; this.jobCount = phaseTwoJobCount; threadDispatcher.DispatchWorkers(phaseTwoWorkerDelegate); } else { for (int i = 0; i < phaseTwoJobCount; ++i) { ExecutePhaseTwoJob(i); } } DisposeForCompletedAwakenings(ref uniqueSetIndices); uniqueSetIndices.Dispose(pool.SpecializeFor <int>()); }
public void Flush(IThreadDispatcher threadDispatcher = null) { var deterministic = threadDispatcher != null && Simulation.Deterministic; OnPreflush(threadDispatcher, deterministic); //var start = Stopwatch.GetTimestamp(); flushJobs = new QuickList <NarrowPhaseFlushJob>(128, Pool); PairCache.PrepareFlushJobs(ref flushJobs); var removalBatchJobCount = ConstraintRemover.CreateFlushJobs(deterministic); //Note that we explicitly add the constraint remover jobs here. //The constraint remover can be used in two ways- sleeper style, and narrow phase style. //In sleeping, we're not actually removing constraints from the simulation completely, so it requires fewer jobs. //The constraint remover just lets you choose which jobs to call. The narrow phase needs all of them. flushJobs.EnsureCapacity(flushJobs.Count + removalBatchJobCount + 4, Pool); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintsFromBodyLists }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.ReturnConstraintHandles }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromBatchReferencedHandles }); if (Solver.ActiveSet.Batches.Count > Solver.FallbackBatchThreshold) { flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintsFromFallbackBatch }); } for (int i = 0; i < removalBatchJobCount; ++i) { flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromTypeBatch, Index = i }); } if (threadDispatcher == null) { for (int i = 0; i < flushJobs.Count; ++i) { ExecuteFlushJob(ref flushJobs[i], Pool); } } else { flushJobIndex = -1; this.threadDispatcher = threadDispatcher; threadDispatcher.DispatchWorkers(flushWorkerLoop); this.threadDispatcher = null; } //var end = Stopwatch.GetTimestamp(); //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}"); flushJobs.Dispose(Pool); PairCache.Postflush(); ConstraintRemover.Postflush(); OnPostflush(threadDispatcher); }
public void Flush(IThreadDispatcher threadDispatcher = null, bool deterministic = false) { OnPreflush(threadDispatcher, deterministic); //var start = Stopwatch.GetTimestamp(); QuickList <NarrowPhaseFlushJob, Buffer <NarrowPhaseFlushJob> > .Create(Pool.SpecializeFor <NarrowPhaseFlushJob>(), 128, out flushJobs); PairCache.PrepareFlushJobs(ref flushJobs); //We indirectly pass the determinism state; it's used by the constraint remover bookkeeping. this.deterministic = deterministic; ConstraintRemover.CreateFlushJobs(ref flushJobs); if (threadDispatcher == null) { for (int i = 0; i < flushJobs.Count; ++i) { ExecuteFlushJob(ref flushJobs[i]); } } else { flushJobIndex = -1; threadDispatcher.DispatchWorkers(flushWorkerLoop); } //var end = Stopwatch.GetTimestamp(); //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}"); flushJobs.Dispose(Pool.SpecializeFor <NarrowPhaseFlushJob>()); PairCache.Postflush(); ConstraintRemover.Postflush(); OnPostflush(threadDispatcher); }
public void Flush(IThreadDispatcher threadDispatcher = null, bool deterministic = false) { OnPreflush(threadDispatcher, deterministic); //var start = Stopwatch.GetTimestamp(); var jobPool = Pool.SpecializeFor <NarrowPhaseFlushJob>(); QuickList <NarrowPhaseFlushJob, Buffer <NarrowPhaseFlushJob> > .Create(jobPool, 128, out flushJobs); PairCache.PrepareFlushJobs(ref flushJobs); //We indirectly pass the determinism state; it's used by the constraint remover bookkeeping. this.deterministic = deterministic; var removalBatchJobCount = ConstraintRemover.CreateFlushJobs(); //Note that we explicitly add the constraint remover jobs here. //The constraint remover can be used in two ways- deactivation style, and narrow phase style. //In deactivation, we're not actually removing constraints from the simulation completely, so it requires fewer jobs. //The constraint remover just lets you choose which jobs to call. The narrow phase needs all of them. flushJobs.EnsureCapacity(flushJobs.Count + removalBatchJobCount + 3, jobPool); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintsFromBodyLists }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.ReturnConstraintHandles }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromBatchReferencedHandles }); for (int i = 0; i < removalBatchJobCount; ++i) { flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromTypeBatch, Index = i }); } if (threadDispatcher == null) { for (int i = 0; i < flushJobs.Count; ++i) { ExecuteFlushJob(ref flushJobs[i], Pool); } } else { flushJobIndex = -1; this.threadDispatcher = threadDispatcher; threadDispatcher.DispatchWorkers(flushWorkerLoop); this.threadDispatcher = null; } //var end = Stopwatch.GetTimestamp(); //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}"); flushJobs.Dispose(Pool.SpecializeFor <NarrowPhaseFlushJob>()); PairCache.Postflush(); ConstraintRemover.Postflush(); OnPostflush(threadDispatcher); }
public static double Time <TDataLayout>(int iterationCount, int flagCount, IThreadDispatcher dispatcher) where TDataLayout : IDataLayout, new() { CacheBlaster.Blast(); var dataLayout = new TDataLayout(); dataLayout.Initialize(); dataLayout.InitializeIteration(flagCount); Action <int> executeFunction = workerIndex => { int jobIndex; while ((jobIndex = Interlocked.Increment(ref globalJobCounter) - 1) < jobs.Length) { dataLayout.Execute(jobs[jobIndex]); } }; globalJobCounter = 0; dispatcher.DispatchWorkers(executeFunction); //jit warmup dataLayout.Validate(flagCount); long time = 0; for (int i = 0; i < iterationCount; ++i) { //Note that individual executions of each approach do not reuse the same memory. The goal is to force cache misses. dataLayout.InitializeIteration(flagCount); globalJobCounter = 0; var start = Stopwatch.GetTimestamp(); dispatcher.DispatchWorkers(executeFunction); var end = Stopwatch.GetTimestamp(); time += end - start; dataLayout.Validate(flagCount); } dataLayout.Dispose(); GC.Collect(3, GCCollectionMode.Forced, true); return(time / (iterationCount * (double)Stopwatch.Frequency)); }
public unsafe void RefitAndRefine(Tree tree, IThreadDispatcher threadDispatcher, int frameIndex, float refineAggressivenessScale = 1, float cacheOptimizeAggressivenessScale = 1) { if (tree.leafCount <= 2) { //If there are 2 or less leaves, then refit/refine/cache optimize doesn't do anything at all. //(The root node has no parent, so it does not have a bounding box, and the SAH won't change no matter how we swap the children of the root.) //Avoiding this case also gives the other codepath a guarantee that it will be working with nodes with two children. return; } this.threadDispatcher = threadDispatcher; Tree = tree; //Note that we create per-thread refinement candidates. That's because candidates are found during the multithreaded refit and mark phase, and //we don't want to spend the time doing sync work. The candidates are then pruned down to a target single target set for the refine pass. Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Take(threadDispatcher.ThreadCount, out RefinementCandidates); tree.GetRefitAndMarkTuning(out MaximumSubtrees, out var estimatedRefinementCandidateCount, out RefinementLeafCountThreshold); //Note that the number of refit nodes is not necessarily bound by MaximumSubtrees. It is just a heuristic estimate. Resizing has to be supported. QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), MaximumSubtrees, out RefitNodes); //Note that we haven't rigorously guaranteed a refinement count maximum, so it's possible that the workers will need to resize the per-thread refinement candidate lists. for (int i = 0; i < threadDispatcher.ThreadCount; ++i) { QuickList <int, Buffer <int> > .Create(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>(), estimatedRefinementCandidateCount, out RefinementCandidates[i]); } int multithreadingLeafCountThreshold = Tree.leafCount / (threadDispatcher.ThreadCount * 2); if (multithreadingLeafCountThreshold < RefinementLeafCountThreshold) { multithreadingLeafCountThreshold = RefinementLeafCountThreshold; } CollectNodesForMultithreadedRefit(0, multithreadingLeafCountThreshold, ref RefitNodes, RefinementLeafCountThreshold, ref RefinementCandidates[0], threadDispatcher.GetThreadMemoryPool(0).SpecializeFor <int>()); RefitNodeIndex = -1; threadDispatcher.DispatchWorkers(RefitAndMarkAction); //Condense the set of candidates into a set of targets. int refinementCandidatesCount = 0; for (int i = 0; i < threadDispatcher.ThreadCount; ++i) { refinementCandidatesCount += RefinementCandidates[i].Count; } Tree.GetRefineTuning(frameIndex, refinementCandidatesCount, refineAggressivenessScale, RefitCostChange, threadDispatcher.ThreadCount, out var targetRefinementCount, out var period, out var offset); QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), targetRefinementCount, out RefinementTargets); //Note that only a subset of all refinement *candidates* will become refinement *targets*. //We start at a semirandom offset and then skip through the set to accumulate targets. //The number of candidates that become targets is based on the refinement aggressiveness, //tuned by both user input (the scale) and on the volatility of the tree (RefitCostChange). var currentCandidatesIndex = 0; int index = offset; for (int i = 0; i < targetRefinementCount - 1; ++i) { index += period; //Wrap around if the index doesn't fit. while (index >= RefinementCandidates[currentCandidatesIndex].Count) { index -= RefinementCandidates[currentCandidatesIndex].Count; ++currentCandidatesIndex; if (currentCandidatesIndex >= threadDispatcher.ThreadCount) { currentCandidatesIndex -= threadDispatcher.ThreadCount; } } Debug.Assert(index < RefinementCandidates[currentCandidatesIndex].Count && index >= 0); var nodeIndex = RefinementCandidates[currentCandidatesIndex][index]; RefinementTargets.AddUnsafely(nodeIndex); tree.nodes[nodeIndex].RefineFlag = 1; } //Note that the root node is only refined if it was not picked as a target earlier. if (tree.nodes->RefineFlag != 1) { RefinementTargets.AddUnsafely(0); tree.nodes->RefineFlag = 1; } RefineIndex = -1; threadDispatcher.DispatchWorkers(RefineAction); //To multithread this, give each worker a contiguous chunk of nodes. You want to do the biggest chunks possible to chain decent cache behavior as far as possible. //Note that more cache optimization is required with more threads, since spreading it out more slightly lessens its effectiveness. var cacheOptimizeCount = Tree.GetCacheOptimizeTuning(MaximumSubtrees, RefitCostChange, (Math.Max(1, threadDispatcher.ThreadCount * 0.25f)) * cacheOptimizeAggressivenessScale); var cacheOptimizationTasks = threadDispatcher.ThreadCount * 2; PerWorkerCacheOptimizeCount = cacheOptimizeCount / cacheOptimizationTasks; var startIndex = (int)(((long)frameIndex * PerWorkerCacheOptimizeCount) % Tree.nodeCount); QuickList <int, Buffer <int> > .Create(Tree.Pool.SpecializeFor <int>(), cacheOptimizationTasks, out CacheOptimizeStarts); CacheOptimizeStarts.AddUnsafely(startIndex); var optimizationSpacing = Tree.nodeCount / threadDispatcher.ThreadCount; var optimizationSpacingWithExtra = optimizationSpacing + 1; var optimizationRemainder = Tree.nodeCount - optimizationSpacing * threadDispatcher.ThreadCount; for (int i = 1; i < cacheOptimizationTasks; ++i) { if (optimizationRemainder > 0) { startIndex += optimizationSpacingWithExtra; --optimizationRemainder; } else { startIndex += optimizationSpacing; } if (startIndex >= Tree.nodeCount) { startIndex -= Tree.nodeCount; } Debug.Assert(startIndex >= 0 && startIndex < Tree.nodeCount); CacheOptimizeStarts.AddUnsafely(startIndex); } threadDispatcher.DispatchWorkers(CacheOptimizeAction); for (int i = 0; i < threadDispatcher.ThreadCount; ++i) { //Note the use of the thread memory pool. Each thread allocated their own memory for the list since resizes were possible. RefinementCandidates[i].Dispose(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>()); } Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Return(ref RefinementCandidates); RefitNodes.Dispose(Tree.Pool.SpecializeFor <int>()); RefinementTargets.Dispose(Tree.Pool.SpecializeFor <int>()); CacheOptimizeStarts.Dispose(Tree.Pool.SpecializeFor <int>()); Tree = null; this.threadDispatcher = null; }