Esempio n. 1
0
        void AnalysisWorker(int workerIndex)
        {
            int jobIndex;

            while ((jobIndex = Interlocked.Increment(ref analysisJobIndex)) < analysisJobs.Count)
            {
                DoJob(ref analysisJobs[jobIndex], workerIndex, threadDispatcher.GetThreadMemoryPool(workerIndex));
            }
        }
Esempio n. 2
0
        void FlushWorkerLoop(int workerIndex)
        {
            int jobIndex;
            var threadPool = threadDispatcher.GetThreadMemoryPool(workerIndex);

            while ((jobIndex = Interlocked.Increment(ref flushJobIndex)) < flushJobs.Count)
            {
                ExecuteFlushJob(ref flushJobs[jobIndex], threadPool);
            }
        }
        private void PrepareOverlapWorkers(IThreadDispatcher threadDispatcher)
        {
            var threadCount = threadDispatcher == null ? 1 : threadDispatcher.ThreadCount;

            //Resizes should be very rare, and having a single extra very small array isn't concerning.
            //(It's not an unmanaged type because it contains nonblittable references.)
            if (overlapWorkers == null || overlapWorkers.Length < threadCount)
            {
                Array.Resize(ref overlapWorkers, threadCount);
            }
            for (int i = 0; i < threadCount; ++i)
            {
                overlapWorkers[i] = new OverlapWorker(i, threadDispatcher != null ? threadDispatcher.GetThreadMemoryPool(i) : Pool, this);
            }
        }
Esempio n. 4
0
        public void Prepare(IThreadDispatcher threadDispatcher = null)
        {
            int maximumConstraintTypeCount = 0, maximumCollisionTypeCount = 0;

            for (int i = 0; i < workerCaches.Count; ++i)
            {
                workerCaches[i].GetMaximumCacheTypeCounts(out var collision, out var constraint);
                if (collision > maximumCollisionTypeCount)
                {
                    maximumCollisionTypeCount = collision;
                }
                if (constraint > maximumConstraintTypeCount)
                {
                    maximumConstraintTypeCount = constraint;
                }
            }
            var minimumSizesPerConstraintType = new QuickList <PreallocationSizes>(maximumConstraintTypeCount, pool);
            var minimumSizesPerCollisionType  = new QuickList <PreallocationSizes>(maximumCollisionTypeCount, pool);

            //Since the minimum size accumulation builds the minimum size incrementally, bad data within the array can corrupt the result- we must clear it.
            minimumSizesPerConstraintType.Span.Clear(0, minimumSizesPerConstraintType.Span.Length);
            minimumSizesPerCollisionType.Span.Clear(0, minimumSizesPerCollisionType.Span.Length);
            for (int i = 0; i < workerCaches.Count; ++i)
            {
                workerCaches[i].AccumulateMinimumSizes(ref minimumSizesPerConstraintType, ref minimumSizesPerCollisionType);
            }

            var threadCount = threadDispatcher != null ? threadDispatcher.ThreadCount : 1;

            //Ensure that the new worker pair caches can hold all workers.
            if (!NextWorkerCaches.Allocated || NextWorkerCaches.Values.Length < threadCount)
            {
                //The next worker caches should never need to be disposed here. The flush should have taken care of it.
#if DEBUG
                for (int i = 0; i < NextWorkerCaches.Count; ++i)
                {
                    Debug.Assert(NextWorkerCaches[i].Equals(default(WorkerPairCache)));
                }
#endif
                Array.Resize(ref NextWorkerCaches.Values, threadCount);
                NextWorkerCaches.Count = threadCount;
            }
            //Note that we have not initialized the workerCaches from the previous frame. In the event that this is the first frame and there are no previous worker caches,
            //there will be no pointers into the caches, and removal analysis loops over the count which defaults to zero- so it's safe.
            NextWorkerCaches.Count = threadCount;

            var pendingSize = Math.Max(minimumPendingSize, previousPendingSize);
            if (threadDispatcher != null)
            {
                for (int i = 0; i < threadCount; ++i)
                {
                    NextWorkerCaches[i] = new WorkerPairCache(i, threadDispatcher.GetThreadMemoryPool(i), ref minimumSizesPerConstraintType, ref minimumSizesPerCollisionType,
                                                              pendingSize, minimumPerTypeCapacity);
                }
            }
            else
            {
                NextWorkerCaches[0] = new WorkerPairCache(0, pool, ref minimumSizesPerConstraintType, ref minimumSizesPerCollisionType, pendingSize, minimumPerTypeCapacity);
            }
            minimumSizesPerConstraintType.Dispose(pool);
            minimumSizesPerCollisionType.Dispose(pool);

            //Create the pair freshness array for the existing overlaps.
            pool.TakeAtLeast(Mapping.Count, out PairFreshness);
            //This clears 1 byte per pair. 32768 pairs with 10GBps assumed single core bandwidth means about 3 microseconds.
            //There is a small chance that multithreading this would be useful in larger simulations- but it would be very, very close.
            PairFreshness.Clear(0, Mapping.Count);
        }
            public unsafe void RefitAndRefine(Tree tree, IThreadDispatcher threadDispatcher, int frameIndex,
                                              float refineAggressivenessScale = 1, float cacheOptimizeAggressivenessScale = 1)
            {
                if (tree.leafCount <= 2)
                {
                    //If there are 2 or less leaves, then refit/refine/cache optimize doesn't do anything at all.
                    //(The root node has no parent, so it does not have a bounding box, and the SAH won't change no matter how we swap the children of the root.)
                    //Avoiding this case also gives the other codepath a guarantee that it will be working with nodes with two children.
                    return;
                }
                this.threadDispatcher = threadDispatcher;
                Tree = tree;
                //Note that we create per-thread refinement candidates. That's because candidates are found during the multithreaded refit and mark phase, and
                //we don't want to spend the time doing sync work. The candidates are then pruned down to a target single target set for the refine pass.
                Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Take(threadDispatcher.ThreadCount, out RefinementCandidates);
                tree.GetRefitAndMarkTuning(out MaximumSubtrees, out var estimatedRefinementCandidateCount, out RefinementLeafCountThreshold);
                //Note that the number of refit nodes is not necessarily bound by MaximumSubtrees. It is just a heuristic estimate. Resizing has to be supported.
                QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), MaximumSubtrees, out RefitNodes);

                //Note that we haven't rigorously guaranteed a refinement count maximum, so it's possible that the workers will need to resize the per-thread refinement candidate lists.
                for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
                {
                    QuickList <int, Buffer <int> > .Create(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>(), estimatedRefinementCandidateCount, out RefinementCandidates[i]);
                }

                int multithreadingLeafCountThreshold = Tree.leafCount / (threadDispatcher.ThreadCount * 2);

                if (multithreadingLeafCountThreshold < RefinementLeafCountThreshold)
                {
                    multithreadingLeafCountThreshold = RefinementLeafCountThreshold;
                }
                CollectNodesForMultithreadedRefit(0, multithreadingLeafCountThreshold, ref RefitNodes, RefinementLeafCountThreshold, ref RefinementCandidates[0],
                                                  threadDispatcher.GetThreadMemoryPool(0).SpecializeFor <int>());

                RefitNodeIndex = -1;
                threadDispatcher.DispatchWorkers(RefitAndMarkAction);

                //Condense the set of candidates into a set of targets.
                int refinementCandidatesCount = 0;

                for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
                {
                    refinementCandidatesCount += RefinementCandidates[i].Count;
                }
                Tree.GetRefineTuning(frameIndex, refinementCandidatesCount, refineAggressivenessScale, RefitCostChange, threadDispatcher.ThreadCount,
                                     out var targetRefinementCount, out var period, out var offset);
                QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), targetRefinementCount, out RefinementTargets);

                //Note that only a subset of all refinement *candidates* will become refinement *targets*.
                //We start at a semirandom offset and then skip through the set to accumulate targets.
                //The number of candidates that become targets is based on the refinement aggressiveness,
                //tuned by both user input (the scale) and on the volatility of the tree (RefitCostChange).
                var currentCandidatesIndex = 0;
                int index = offset;

                for (int i = 0; i < targetRefinementCount - 1; ++i)
                {
                    index += period;
                    //Wrap around if the index doesn't fit.
                    while (index >= RefinementCandidates[currentCandidatesIndex].Count)
                    {
                        index -= RefinementCandidates[currentCandidatesIndex].Count;
                        ++currentCandidatesIndex;
                        if (currentCandidatesIndex >= threadDispatcher.ThreadCount)
                        {
                            currentCandidatesIndex -= threadDispatcher.ThreadCount;
                        }
                    }
                    Debug.Assert(index < RefinementCandidates[currentCandidatesIndex].Count && index >= 0);
                    var nodeIndex = RefinementCandidates[currentCandidatesIndex][index];
                    RefinementTargets.AddUnsafely(nodeIndex);
                    tree.nodes[nodeIndex].RefineFlag = 1;
                }
                //Note that the root node is only refined if it was not picked as a target earlier.
                if (tree.nodes->RefineFlag != 1)
                {
                    RefinementTargets.AddUnsafely(0);
                    tree.nodes->RefineFlag = 1;
                }


                RefineIndex = -1;
                threadDispatcher.DispatchWorkers(RefineAction);

                //To multithread this, give each worker a contiguous chunk of nodes. You want to do the biggest chunks possible to chain decent cache behavior as far as possible.
                //Note that more cache optimization is required with more threads, since spreading it out more slightly lessens its effectiveness.
                var cacheOptimizeCount = Tree.GetCacheOptimizeTuning(MaximumSubtrees, RefitCostChange, (Math.Max(1, threadDispatcher.ThreadCount * 0.25f)) * cacheOptimizeAggressivenessScale);

                var cacheOptimizationTasks = threadDispatcher.ThreadCount * 2;

                PerWorkerCacheOptimizeCount = cacheOptimizeCount / cacheOptimizationTasks;
                var startIndex = (int)(((long)frameIndex * PerWorkerCacheOptimizeCount) % Tree.nodeCount);

                QuickList <int, Buffer <int> > .Create(Tree.Pool.SpecializeFor <int>(), cacheOptimizationTasks, out CacheOptimizeStarts);

                CacheOptimizeStarts.AddUnsafely(startIndex);

                var optimizationSpacing          = Tree.nodeCount / threadDispatcher.ThreadCount;
                var optimizationSpacingWithExtra = optimizationSpacing + 1;
                var optimizationRemainder        = Tree.nodeCount - optimizationSpacing * threadDispatcher.ThreadCount;

                for (int i = 1; i < cacheOptimizationTasks; ++i)
                {
                    if (optimizationRemainder > 0)
                    {
                        startIndex += optimizationSpacingWithExtra;
                        --optimizationRemainder;
                    }
                    else
                    {
                        startIndex += optimizationSpacing;
                    }
                    if (startIndex >= Tree.nodeCount)
                    {
                        startIndex -= Tree.nodeCount;
                    }
                    Debug.Assert(startIndex >= 0 && startIndex < Tree.nodeCount);
                    CacheOptimizeStarts.AddUnsafely(startIndex);
                }

                threadDispatcher.DispatchWorkers(CacheOptimizeAction);

                for (int i = 0; i < threadDispatcher.ThreadCount; ++i)
                {
                    //Note the use of the thread memory pool. Each thread allocated their own memory for the list since resizes were possible.
                    RefinementCandidates[i].Dispose(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>());
                }
                Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Return(ref RefinementCandidates);
                RefitNodes.Dispose(Tree.Pool.SpecializeFor <int>());
                RefinementTargets.Dispose(Tree.Pool.SpecializeFor <int>());
                CacheOptimizeStarts.Dispose(Tree.Pool.SpecializeFor <int>());
                Tree = null;
                this.threadDispatcher = null;
            }