public unsafe int RefitAndRefine(int frameIndex, IParallelLooper looper, RefitAndRefineMultithreadedContext context, float refineAggressivenessScale = 1, float cacheOptimizeAggressivenessScale = 1)
        {
            //Don't proceed if the tree is empty.
            if (leafCount == 0)
                return 0;
            var pool = BufferPools<int>.Locking;

            int estimatedRefinementTargetCount;
            GetRefitAndMarkTuning(out context.MaximumSubtrees, out estimatedRefinementTargetCount, out context.LeafCountThreshold);

            context.Initialize(looper.ThreadCount, estimatedRefinementTargetCount, pool);

            //Collect the refinement candidates.
            if (LeafCount <= 2)
            {
                RefitAndMark(context.LeafCountThreshold, ref context.RefinementCandidates.Elements[0]);
            }
            else
            {
                CollectNodesForMultithreadedRefit(looper.ThreadCount, ref context.RefitNodes, context.LeafCountThreshold, ref context.RefinementCandidates.Elements[0]);
                //Console.WriteLine($"Refit subtree count: {context.RefitNodes.Count}");
                looper.ForLoop(0, looper.ThreadCount, context.RefitAndMarkAction);
            }

            var refinementCandidatesCount = 0;
            for (int i = 0; i < looper.ThreadCount; ++i)
            {
                refinementCandidatesCount += context.RefinementCandidates.Elements[i].Count;
            }

            int targetRefinementCount, period, offset;
            GetRefineTuning(frameIndex, refinementCandidatesCount, refineAggressivenessScale, context.RefitCostChange, looper.ThreadCount, out targetRefinementCount, out period, out offset);

            //Condense the set of candidates into a set of targets.
            context.RefinementTargets = new QuickList<int>(pool, BufferPool<int>.GetPoolIndex(targetRefinementCount));

            int actualRefinementTargetsCount = 0;
            var currentCandidatesIndex = 0;
            int index = offset;
            for (int i = 0; i < targetRefinementCount - 1; ++i)
            {
                index += period;
                //Wrap around if the index doesn't fit.
                while (index >= context.RefinementCandidates.Elements[currentCandidatesIndex].Count)
                {
                    index -= context.RefinementCandidates.Elements[currentCandidatesIndex].Count;
                    ++currentCandidatesIndex;
                    if (currentCandidatesIndex >= context.RefinementCandidates.Count)
                        currentCandidatesIndex -= context.RefinementCandidates.Count;
                }
                Debug.Assert(index < context.RefinementCandidates.Elements[currentCandidatesIndex].Count && index >= 0);
                var nodeIndex = context.RefinementCandidates.Elements[currentCandidatesIndex].Elements[index];
                context.RefinementTargets.Elements[actualRefinementTargetsCount++] = nodeIndex;
                nodes[nodeIndex].RefineFlag = 1;
            }
            context.RefinementTargets.Count = actualRefinementTargetsCount;
            if (nodes->RefineFlag != 1)
            {
                context.RefinementTargets.Add(0);
                ++actualRefinementTargetsCount;
                nodes->RefineFlag = 1;
            }

            //Refine all marked targets.
            looper.ForLoop(0, Math.Min(looper.ThreadCount, context.RefinementTargets.Count), context.RefineAction);

            //To multithread this, give each worker a contiguous chunk of nodes. You want to do the biggest chunks possible to chain decent cache behavior as far as possible.
            //Note that more cache optimization is required with more threads, since spreading it out more slightly lessens its effectiveness.
            var cacheOptimizeCount = GetCacheOptimizeTuning(context.MaximumSubtrees, context.RefitCostChange, (Math.Max(1, looper.ThreadCount * 0.25f)) * cacheOptimizeAggressivenessScale);

            var cacheOptimizationTasks = looper.ThreadCount * 2;
            context.PerWorkerCacheOptimizeCount = cacheOptimizeCount / cacheOptimizationTasks;
            var startIndex = (int)(((long)frameIndex * context.PerWorkerCacheOptimizeCount) % nodeCount);
            context.CacheOptimizeStarts.Add(startIndex);

            var optimizationSpacing = nodeCount / looper.ThreadCount;
            var optimizationSpacingWithExtra = optimizationSpacing + 1;
            var optimizationRemainder = nodeCount - optimizationSpacing * looper.ThreadCount;

            for (int i = 1; i < cacheOptimizationTasks; ++i)
            {
                if (optimizationRemainder > 0)
                {
                    startIndex += optimizationSpacingWithExtra;
                    --optimizationRemainder;
                }
                else
                {
                    startIndex += optimizationSpacing;
                }
                if (startIndex >= nodeCount)
                    startIndex -= nodeCount;
                Debug.Assert(startIndex >= 0 && startIndex < nodeCount);
                context.CacheOptimizeStarts.Add(startIndex);
            }

            //for (int i = 0; i < looper.ThreadCount; ++i)
            //{
            //    var start = context.CacheOptimizeStarts[i];
            //    var end = Math.Min(start + context.PerWorkerCacheOptimizeCount, NodeCount);
            //    for (int j = start; j < end; ++j)
            //    {
            //        //ValidateRefineFlags(0);
            //        IncrementalCacheOptimizeThreadSafe(j);
            //        //ValidateRefineFlags(0);
            //    }
            //}

            //var start = Stopwatch.GetTimestamp() / (double)Stopwatch.Frequency;
            //Validate();
            //ValidateRefineFlags(0);
            looper.ForLoop(0, cacheOptimizationTasks, context.CacheOptimizeAction);
            //ValidateRefineFlags(0);
            //var end = Stopwatch.GetTimestamp() / (double)Stopwatch.Frequency;

            //Validate();
            //Console.WriteLine($"Cache optimize time: {end - start}");

            context.CleanUp();
            return actualRefinementTargetsCount;
        }
Esempio n. 2
0
        public unsafe int RefitAndRefine(int frameIndex, IParallelLooper looper, RefitAndRefineMultithreadedContext context, float refineAggressivenessScale = 1, float cacheOptimizeAggressivenessScale = 1)
        {
            //Don't proceed if the tree is empty.
            if (leafCount == 0)
            {
                return(0);
            }
            var pool = BufferPools <int> .Locking;

            int estimatedRefinementTargetCount;

            GetRefitAndMarkTuning(out context.MaximumSubtrees, out estimatedRefinementTargetCount, out context.LeafCountThreshold);

            context.Initialize(looper.ThreadCount, estimatedRefinementTargetCount, pool);

            //Collect the refinement candidates.
            if (LeafCount <= 2)
            {
                RefitAndMark(context.LeafCountThreshold, ref context.RefinementCandidates.Elements[0]);
            }
            else
            {
                CollectNodesForMultithreadedRefit(looper.ThreadCount, ref context.RefitNodes, context.LeafCountThreshold, ref context.RefinementCandidates.Elements[0]);
                //Console.WriteLine($"Refit subtree count: {context.RefitNodes.Count}");
                looper.ForLoop(0, looper.ThreadCount, context.RefitAndMarkAction);
            }

            var refinementCandidatesCount = 0;

            for (int i = 0; i < looper.ThreadCount; ++i)
            {
                refinementCandidatesCount += context.RefinementCandidates.Elements[i].Count;
            }

            int targetRefinementCount, period, offset;

            GetRefineTuning(frameIndex, refinementCandidatesCount, refineAggressivenessScale, context.RefitCostChange, looper.ThreadCount, out targetRefinementCount, out period, out offset);



            //Condense the set of candidates into a set of targets.
            context.RefinementTargets = new QuickList <int>(pool, BufferPool <int> .GetPoolIndex(targetRefinementCount));


            int actualRefinementTargetsCount = 0;
            var currentCandidatesIndex       = 0;
            int index = offset;

            for (int i = 0; i < targetRefinementCount - 1; ++i)
            {
                index += period;
                //Wrap around if the index doesn't fit.
                while (index >= context.RefinementCandidates.Elements[currentCandidatesIndex].Count)
                {
                    index -= context.RefinementCandidates.Elements[currentCandidatesIndex].Count;
                    ++currentCandidatesIndex;
                    if (currentCandidatesIndex >= context.RefinementCandidates.Count)
                    {
                        currentCandidatesIndex -= context.RefinementCandidates.Count;
                    }
                }
                Debug.Assert(index < context.RefinementCandidates.Elements[currentCandidatesIndex].Count && index >= 0);
                var nodeIndex = context.RefinementCandidates.Elements[currentCandidatesIndex].Elements[index];
                context.RefinementTargets.Elements[actualRefinementTargetsCount++] = nodeIndex;
                nodes[nodeIndex].RefineFlag = 1;
            }
            context.RefinementTargets.Count = actualRefinementTargetsCount;
            if (nodes->RefineFlag != 1)
            {
                context.RefinementTargets.Add(0);
                ++actualRefinementTargetsCount;
                nodes->RefineFlag = 1;
            }



            //Refine all marked targets.
            looper.ForLoop(0, Math.Min(looper.ThreadCount, context.RefinementTargets.Count), context.RefineAction);


            //To multithread this, give each worker a contiguous chunk of nodes. You want to do the biggest chunks possible to chain decent cache behavior as far as possible.
            //Note that more cache optimization is required with more threads, since spreading it out more slightly lessens its effectiveness.
            var cacheOptimizeCount = GetCacheOptimizeTuning(context.MaximumSubtrees, context.RefitCostChange, (Math.Max(1, looper.ThreadCount * 0.25f)) * cacheOptimizeAggressivenessScale);

            var cacheOptimizationTasks = looper.ThreadCount * 2;

            context.PerWorkerCacheOptimizeCount = cacheOptimizeCount / cacheOptimizationTasks;
            var startIndex = (int)(((long)frameIndex * context.PerWorkerCacheOptimizeCount) % nodeCount);

            context.CacheOptimizeStarts.Add(startIndex);

            var optimizationSpacing          = nodeCount / looper.ThreadCount;
            var optimizationSpacingWithExtra = optimizationSpacing + 1;
            var optimizationRemainder        = nodeCount - optimizationSpacing * looper.ThreadCount;

            for (int i = 1; i < cacheOptimizationTasks; ++i)
            {
                if (optimizationRemainder > 0)
                {
                    startIndex += optimizationSpacingWithExtra;
                    --optimizationRemainder;
                }
                else
                {
                    startIndex += optimizationSpacing;
                }
                if (startIndex >= nodeCount)
                {
                    startIndex -= nodeCount;
                }
                Debug.Assert(startIndex >= 0 && startIndex < nodeCount);
                context.CacheOptimizeStarts.Add(startIndex);
            }

            //for (int i = 0; i < looper.ThreadCount; ++i)
            //{
            //    var start = context.CacheOptimizeStarts[i];
            //    var end = Math.Min(start + context.PerWorkerCacheOptimizeCount, NodeCount);
            //    for (int j = start; j < end; ++j)
            //    {
            //        //ValidateRefineFlags(0);
            //        IncrementalCacheOptimizeThreadSafe(j);
            //        //ValidateRefineFlags(0);
            //    }
            //}


            //var start = Stopwatch.GetTimestamp() / (double)Stopwatch.Frequency;
            //Validate();
            //ValidateRefineFlags(0);
            looper.ForLoop(0, cacheOptimizationTasks, context.CacheOptimizeAction);
            //ValidateRefineFlags(0);
            //var end = Stopwatch.GetTimestamp() / (double)Stopwatch.Frequency;

            //Validate();
            //Console.WriteLine($"Cache optimize time: {end - start}");



            context.CleanUp();
            return(actualRefinementTargetsCount);
        }