예제 #1
0
        /// <summary>
        /// Awakens a list of set indices.
        /// </summary>
        /// <param name="setIndices">List of set indices to wake up.</param>
        /// <param name="threadDispatcher">Thread dispatcher to use when waking the bodies. Pass null to run on a single thread.</param>
        public void AwakenSets(ref QuickList <int, Buffer <int> > setIndices, IThreadDispatcher threadDispatcher = null)
        {
            QuickList <int, Buffer <int> > .Create(pool.SpecializeFor <int>(), setIndices.Count, out var uniqueSetIndices);

            var uniqueSet = new IndexSet(pool, bodies.Sets.Length);

            AccumulateUniqueIndices(ref setIndices, ref uniqueSet, ref uniqueSetIndices);
            uniqueSet.Dispose(pool);

            //Note that we use the same codepath as multithreading, we just don't use a multithreaded dispatch to execute jobs.
            //TODO: It would probably be a good idea to add a little heuristic to avoid doing multithreaded dispatches if there are only like 5 total bodies.
            //Shouldn't matter too much- the threaded variant should only really be used when doing big batched changes, so having a fixed constant cost isn't that bad.
            int threadCount = threadDispatcher == null ? 1 : threadDispatcher.ThreadCount;

            //Note that direct wakes always reset activity states. I suspect this is sufficiently universal that no one will ever want the alternative,
            //even though the narrowphase does avoid resetting activity states for the sake of faster resleeping when possible.
            var(phaseOneJobCount, phaseTwoJobCount) = PrepareJobs(ref uniqueSetIndices, true, threadCount);

            if (threadCount > 1)
            {
                this.jobIndex = -1;
                this.jobCount = phaseOneJobCount;
                threadDispatcher.DispatchWorkers(phaseOneWorkerDelegate);
            }
            else
            {
                for (int i = 0; i < phaseOneJobCount; ++i)
                {
                    ExecutePhaseOneJob(i);
                }
            }

            if (threadCount > 1)
            {
                this.jobIndex = -1;
                this.jobCount = phaseTwoJobCount;
                threadDispatcher.DispatchWorkers(phaseTwoWorkerDelegate);
            }
            else
            {
                for (int i = 0; i < phaseTwoJobCount; ++i)
                {
                    ExecutePhaseTwoJob(i);
                }
            }

            DisposeForCompletedAwakenings(ref uniqueSetIndices);

            uniqueSetIndices.Dispose(pool.SpecializeFor <int>());
        }
예제 #2
0
        public void Flush(IThreadDispatcher threadDispatcher = null)
        {
            var deterministic = threadDispatcher != null && Simulation.Deterministic;

            OnPreflush(threadDispatcher, deterministic);
            //var start = Stopwatch.GetTimestamp();
            flushJobs = new QuickList <NarrowPhaseFlushJob>(128, Pool);
            PairCache.PrepareFlushJobs(ref flushJobs);
            var removalBatchJobCount = ConstraintRemover.CreateFlushJobs(deterministic);

            //Note that we explicitly add the constraint remover jobs here.
            //The constraint remover can be used in two ways- sleeper style, and narrow phase style.
            //In sleeping, we're not actually removing constraints from the simulation completely, so it requires fewer jobs.
            //The constraint remover just lets you choose which jobs to call. The narrow phase needs all of them.
            flushJobs.EnsureCapacity(flushJobs.Count + removalBatchJobCount + 4, Pool);
            flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                Type = NarrowPhaseFlushJobType.RemoveConstraintsFromBodyLists
            });
            flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                Type = NarrowPhaseFlushJobType.ReturnConstraintHandles
            });
            flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                Type = NarrowPhaseFlushJobType.RemoveConstraintFromBatchReferencedHandles
            });
            if (Solver.ActiveSet.Batches.Count > Solver.FallbackBatchThreshold)
            {
                flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                    Type = NarrowPhaseFlushJobType.RemoveConstraintsFromFallbackBatch
                });
            }
            for (int i = 0; i < removalBatchJobCount; ++i)
            {
                flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                    Type = NarrowPhaseFlushJobType.RemoveConstraintFromTypeBatch, Index = i
                });
            }

            if (threadDispatcher == null)
            {
                for (int i = 0; i < flushJobs.Count; ++i)
                {
                    ExecuteFlushJob(ref flushJobs[i], Pool);
                }
            }
            else
            {
                flushJobIndex         = -1;
                this.threadDispatcher = threadDispatcher;
                threadDispatcher.DispatchWorkers(flushWorkerLoop);
                this.threadDispatcher = null;
            }
            //var end = Stopwatch.GetTimestamp();
            //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}");
            flushJobs.Dispose(Pool);

            PairCache.Postflush();
            ConstraintRemover.Postflush();

            OnPostflush(threadDispatcher);
        }
예제 #3
0
        public void Flush(IThreadDispatcher threadDispatcher = null, bool deterministic = false)
        {
            OnPreflush(threadDispatcher, deterministic);
            //var start = Stopwatch.GetTimestamp();
            QuickList <NarrowPhaseFlushJob, Buffer <NarrowPhaseFlushJob> > .Create(Pool.SpecializeFor <NarrowPhaseFlushJob>(), 128, out flushJobs);

            PairCache.PrepareFlushJobs(ref flushJobs);
            //We indirectly pass the determinism state; it's used by the constraint remover bookkeeping.
            this.deterministic = deterministic;
            ConstraintRemover.CreateFlushJobs(ref flushJobs);

            if (threadDispatcher == null)
            {
                for (int i = 0; i < flushJobs.Count; ++i)
                {
                    ExecuteFlushJob(ref flushJobs[i]);
                }
            }
            else
            {
                flushJobIndex = -1;
                threadDispatcher.DispatchWorkers(flushWorkerLoop);
            }
            //var end = Stopwatch.GetTimestamp();
            //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}");
            flushJobs.Dispose(Pool.SpecializeFor <NarrowPhaseFlushJob>());

            PairCache.Postflush();
            ConstraintRemover.Postflush();

            OnPostflush(threadDispatcher);
        }
예제 #4
0
        public void Flush(IThreadDispatcher threadDispatcher = null, bool deterministic = false)
        {
            OnPreflush(threadDispatcher, deterministic);
            //var start = Stopwatch.GetTimestamp();
            var jobPool = Pool.SpecializeFor <NarrowPhaseFlushJob>();

            QuickList <NarrowPhaseFlushJob, Buffer <NarrowPhaseFlushJob> > .Create(jobPool, 128, out flushJobs);

            PairCache.PrepareFlushJobs(ref flushJobs);
            //We indirectly pass the determinism state; it's used by the constraint remover bookkeeping.
            this.deterministic = deterministic;
            var removalBatchJobCount = ConstraintRemover.CreateFlushJobs();

            //Note that we explicitly add the constraint remover jobs here.
            //The constraint remover can be used in two ways- deactivation style, and narrow phase style.
            //In deactivation, we're not actually removing constraints from the simulation completely, so it requires fewer jobs.
            //The constraint remover just lets you choose which jobs to call. The narrow phase needs all of them.
            flushJobs.EnsureCapacity(flushJobs.Count + removalBatchJobCount + 3, jobPool);
            flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                Type = NarrowPhaseFlushJobType.RemoveConstraintsFromBodyLists
            });
            flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                Type = NarrowPhaseFlushJobType.ReturnConstraintHandles
            });
            flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                Type = NarrowPhaseFlushJobType.RemoveConstraintFromBatchReferencedHandles
            });
            for (int i = 0; i < removalBatchJobCount; ++i)
            {
                flushJobs.AddUnsafely(new NarrowPhaseFlushJob {
                    Type = NarrowPhaseFlushJobType.RemoveConstraintFromTypeBatch, Index = i
                });
            }

            if (threadDispatcher == null)
            {
                for (int i = 0; i < flushJobs.Count; ++i)
                {
                    ExecuteFlushJob(ref flushJobs[i], Pool);
                }
            }
            else
            {
                flushJobIndex         = -1;
                this.threadDispatcher = threadDispatcher;
                threadDispatcher.DispatchWorkers(flushWorkerLoop);
                this.threadDispatcher = null;
            }
            //var end = Stopwatch.GetTimestamp();
            //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}");
            flushJobs.Dispose(Pool.SpecializeFor <NarrowPhaseFlushJob>());

            PairCache.Postflush();
            ConstraintRemover.Postflush();

            OnPostflush(threadDispatcher);
        }
예제 #5
0
        public static double Time <TDataLayout>(int iterationCount, int flagCount, IThreadDispatcher dispatcher) where TDataLayout : IDataLayout, new()
        {
            CacheBlaster.Blast();
            var dataLayout = new TDataLayout();

            dataLayout.Initialize();
            dataLayout.InitializeIteration(flagCount);
            Action <int> executeFunction = workerIndex =>
            {
                int jobIndex;
                while ((jobIndex = Interlocked.Increment(ref globalJobCounter) - 1) < jobs.Length)
                {
                    dataLayout.Execute(jobs[jobIndex]);
                }
            };

            globalJobCounter = 0;
            dispatcher.DispatchWorkers(executeFunction); //jit warmup
            dataLayout.Validate(flagCount);
            long time = 0;

            for (int i = 0; i < iterationCount; ++i)
            {
                //Note that individual executions of each approach do not reuse the same memory. The goal is to force cache misses.
                dataLayout.InitializeIteration(flagCount);
                globalJobCounter = 0;
                var start = Stopwatch.GetTimestamp();
                dispatcher.DispatchWorkers(executeFunction);
                var end = Stopwatch.GetTimestamp();
                time += end - start;
                dataLayout.Validate(flagCount);
            }
            dataLayout.Dispose();
            GC.Collect(3, GCCollectionMode.Forced, true);
            return(time / (iterationCount * (double)Stopwatch.Frequency));
        }
            public unsafe void RefitAndRefine(Tree tree, IThreadDispatcher threadDispatcher, int frameIndex,
                                              float refineAggressivenessScale = 1, float cacheOptimizeAggressivenessScale = 1)
            {
                if (tree.leafCount <= 2)
                {
                    //If there are 2 or less leaves, then refit/refine/cache optimize doesn't do anything at all.
                    //(The root node has no parent, so it does not have a bounding box, and the SAH won't change no matter how we swap the children of the root.)
                    //Avoiding this case also gives the other codepath a guarantee that it will be working with nodes with two children.
                    return;
                }
                this.threadDispatcher = threadDispatcher;
                Tree = tree;
                //Note that we create per-thread refinement candidates. That's because candidates are found during the multithreaded refit and mark phase, and
                //we don't want to spend the time doing sync work. The candidates are then pruned down to a target single target set for the refine pass.
                Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Take(threadDispatcher.ThreadCount, out RefinementCandidates);
                tree.GetRefitAndMarkTuning(out MaximumSubtrees, out var estimatedRefinementCandidateCount, out RefinementLeafCountThreshold);
                //Note that the number of refit nodes is not necessarily bound by MaximumSubtrees. It is just a heuristic estimate. Resizing has to be supported.
                QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), MaximumSubtrees, out RefitNodes);

                //Note that we haven't rigorously guaranteed a refinement count maximum, so it's possible that the workers will need to resize the per-thread refinement candidate lists.
                for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
                {
                    QuickList <int, Buffer <int> > .Create(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>(), estimatedRefinementCandidateCount, out RefinementCandidates[i]);
                }

                int multithreadingLeafCountThreshold = Tree.leafCount / (threadDispatcher.ThreadCount * 2);

                if (multithreadingLeafCountThreshold < RefinementLeafCountThreshold)
                {
                    multithreadingLeafCountThreshold = RefinementLeafCountThreshold;
                }
                CollectNodesForMultithreadedRefit(0, multithreadingLeafCountThreshold, ref RefitNodes, RefinementLeafCountThreshold, ref RefinementCandidates[0],
                                                  threadDispatcher.GetThreadMemoryPool(0).SpecializeFor <int>());

                RefitNodeIndex = -1;
                threadDispatcher.DispatchWorkers(RefitAndMarkAction);

                //Condense the set of candidates into a set of targets.
                int refinementCandidatesCount = 0;

                for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
                {
                    refinementCandidatesCount += RefinementCandidates[i].Count;
                }
                Tree.GetRefineTuning(frameIndex, refinementCandidatesCount, refineAggressivenessScale, RefitCostChange, threadDispatcher.ThreadCount,
                                     out var targetRefinementCount, out var period, out var offset);
                QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), targetRefinementCount, out RefinementTargets);

                //Note that only a subset of all refinement *candidates* will become refinement *targets*.
                //We start at a semirandom offset and then skip through the set to accumulate targets.
                //The number of candidates that become targets is based on the refinement aggressiveness,
                //tuned by both user input (the scale) and on the volatility of the tree (RefitCostChange).
                var currentCandidatesIndex = 0;
                int index = offset;

                for (int i = 0; i < targetRefinementCount - 1; ++i)
                {
                    index += period;
                    //Wrap around if the index doesn't fit.
                    while (index >= RefinementCandidates[currentCandidatesIndex].Count)
                    {
                        index -= RefinementCandidates[currentCandidatesIndex].Count;
                        ++currentCandidatesIndex;
                        if (currentCandidatesIndex >= threadDispatcher.ThreadCount)
                        {
                            currentCandidatesIndex -= threadDispatcher.ThreadCount;
                        }
                    }
                    Debug.Assert(index < RefinementCandidates[currentCandidatesIndex].Count && index >= 0);
                    var nodeIndex = RefinementCandidates[currentCandidatesIndex][index];
                    RefinementTargets.AddUnsafely(nodeIndex);
                    tree.nodes[nodeIndex].RefineFlag = 1;
                }
                //Note that the root node is only refined if it was not picked as a target earlier.
                if (tree.nodes->RefineFlag != 1)
                {
                    RefinementTargets.AddUnsafely(0);
                    tree.nodes->RefineFlag = 1;
                }


                RefineIndex = -1;
                threadDispatcher.DispatchWorkers(RefineAction);

                //To multithread this, give each worker a contiguous chunk of nodes. You want to do the biggest chunks possible to chain decent cache behavior as far as possible.
                //Note that more cache optimization is required with more threads, since spreading it out more slightly lessens its effectiveness.
                var cacheOptimizeCount = Tree.GetCacheOptimizeTuning(MaximumSubtrees, RefitCostChange, (Math.Max(1, threadDispatcher.ThreadCount * 0.25f)) * cacheOptimizeAggressivenessScale);

                var cacheOptimizationTasks = threadDispatcher.ThreadCount * 2;

                PerWorkerCacheOptimizeCount = cacheOptimizeCount / cacheOptimizationTasks;
                var startIndex = (int)(((long)frameIndex * PerWorkerCacheOptimizeCount) % Tree.nodeCount);

                QuickList <int, Buffer <int> > .Create(Tree.Pool.SpecializeFor <int>(), cacheOptimizationTasks, out CacheOptimizeStarts);

                CacheOptimizeStarts.AddUnsafely(startIndex);

                var optimizationSpacing          = Tree.nodeCount / threadDispatcher.ThreadCount;
                var optimizationSpacingWithExtra = optimizationSpacing + 1;
                var optimizationRemainder        = Tree.nodeCount - optimizationSpacing * threadDispatcher.ThreadCount;

                for (int i = 1; i < cacheOptimizationTasks; ++i)
                {
                    if (optimizationRemainder > 0)
                    {
                        startIndex += optimizationSpacingWithExtra;
                        --optimizationRemainder;
                    }
                    else
                    {
                        startIndex += optimizationSpacing;
                    }
                    if (startIndex >= Tree.nodeCount)
                    {
                        startIndex -= Tree.nodeCount;
                    }
                    Debug.Assert(startIndex >= 0 && startIndex < Tree.nodeCount);
                    CacheOptimizeStarts.AddUnsafely(startIndex);
                }

                threadDispatcher.DispatchWorkers(CacheOptimizeAction);

                for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
                {
                    //Note the use of the thread memory pool. Each thread allocated their own memory for the list since resizes were possible.
                    RefinementCandidates[i].Dispose(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>());
                }
                Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Return(ref RefinementCandidates);
                RefitNodes.Dispose(Tree.Pool.SpecializeFor <int>());
                RefinementTargets.Dispose(Tree.Pool.SpecializeFor <int>());
                CacheOptimizeStarts.Dispose(Tree.Pool.SpecializeFor <int>());
                Tree = null;
                this.threadDispatcher = null;
            }