public void Flush(IThreadDispatcher threadDispatcher = null) { var deterministic = threadDispatcher != null && Simulation.Deterministic; OnPreflush(threadDispatcher, deterministic); //var start = Stopwatch.GetTimestamp(); flushJobs = new QuickList <NarrowPhaseFlushJob>(128, Pool); PairCache.PrepareFlushJobs(ref flushJobs); var removalBatchJobCount = ConstraintRemover.CreateFlushJobs(deterministic); //Note that we explicitly add the constraint remover jobs here. //The constraint remover can be used in two ways- sleeper style, and narrow phase style. //In sleeping, we're not actually removing constraints from the simulation completely, so it requires fewer jobs. //The constraint remover just lets you choose which jobs to call. The narrow phase needs all of them. flushJobs.EnsureCapacity(flushJobs.Count + removalBatchJobCount + 4, Pool); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintsFromBodyLists }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.ReturnConstraintHandles }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromBatchReferencedHandles }); if (Solver.ActiveSet.Batches.Count > Solver.FallbackBatchThreshold) { flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintsFromFallbackBatch }); } for (int i = 0; i < removalBatchJobCount; ++i) { flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromTypeBatch, Index = i }); } if (threadDispatcher == null) { for (int i = 0; i < flushJobs.Count; ++i) { ExecuteFlushJob(ref flushJobs[i], Pool); } } else { flushJobIndex = -1; this.threadDispatcher = threadDispatcher; threadDispatcher.DispatchWorkers(flushWorkerLoop); this.threadDispatcher = null; } //var end = Stopwatch.GetTimestamp(); //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}"); flushJobs.Dispose(Pool); PairCache.Postflush(); ConstraintRemover.Postflush(); OnPostflush(threadDispatcher); }
public unsafe void ExtractLines(ref BallSocketPrestepData prestepBundle, int innerIndex, int setIndex, int *bodyIndices, Bodies bodies, ref Vector3 tint, ref QuickList <LineInstance, Array <LineInstance> > lines) { //Could do bundles of constraints at a time, but eh. var poseA = bodies.Sets[setIndex].Poses[bodyIndices[0]]; var poseB = bodies.Sets[setIndex].Poses[bodyIndices[1]]; Vector3Wide.ReadSlot(ref prestepBundle.LocalOffsetA, innerIndex, out var localOffsetA); Vector3Wide.ReadSlot(ref prestepBundle.LocalOffsetB, innerIndex, out var localOffsetB); Quaternion.Transform(localOffsetA, poseA.Orientation, out var worldOffsetA); Quaternion.Transform(localOffsetB, poseB.Orientation, out var worldOffsetB); var endA = poseA.Position + worldOffsetA; var endB = poseB.Position + worldOffsetB; var color = new Vector3(0.2f, 0.2f, 1f) * tint; var packedColor = Helpers.PackColor(color); var backgroundColor = new Vector3(0f, 0f, 1f) * tint; var lineA = new LineInstance(poseA.Position, endA, packedColor, 0); var lineB = new LineInstance(poseB.Position, endB, packedColor, 0); lines.AddUnsafely(ref lineA); lines.AddUnsafely(ref lineB); var errorColor = new Vector3(1, 0, 0) * tint; var packedErrorColor = Helpers.PackColor(errorColor); var errorLine = new LineInstance(endA, endB, packedErrorColor, 0); lines.AddUnsafely(ref errorLine); }
public void Flush(IThreadDispatcher threadDispatcher = null, bool deterministic = false) { OnPreflush(threadDispatcher, deterministic); //var start = Stopwatch.GetTimestamp(); var jobPool = Pool.SpecializeFor <NarrowPhaseFlushJob>(); QuickList <NarrowPhaseFlushJob, Buffer <NarrowPhaseFlushJob> > .Create(jobPool, 128, out flushJobs); PairCache.PrepareFlushJobs(ref flushJobs); //We indirectly pass the determinism state; it's used by the constraint remover bookkeeping. this.deterministic = deterministic; var removalBatchJobCount = ConstraintRemover.CreateFlushJobs(); //Note that we explicitly add the constraint remover jobs here. //The constraint remover can be used in two ways- deactivation style, and narrow phase style. //In deactivation, we're not actually removing constraints from the simulation completely, so it requires fewer jobs. //The constraint remover just lets you choose which jobs to call. The narrow phase needs all of them. flushJobs.EnsureCapacity(flushJobs.Count + removalBatchJobCount + 3, jobPool); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintsFromBodyLists }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.ReturnConstraintHandles }); flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromBatchReferencedHandles }); for (int i = 0; i < removalBatchJobCount; ++i) { flushJobs.AddUnsafely(new NarrowPhaseFlushJob { Type = NarrowPhaseFlushJobType.RemoveConstraintFromTypeBatch, Index = i }); } if (threadDispatcher == null) { for (int i = 0; i < flushJobs.Count; ++i) { ExecuteFlushJob(ref flushJobs[i], Pool); } } else { flushJobIndex = -1; this.threadDispatcher = threadDispatcher; threadDispatcher.DispatchWorkers(flushWorkerLoop); this.threadDispatcher = null; } //var end = Stopwatch.GetTimestamp(); //Console.WriteLine($"Flush stage 3 time (us): {1e6 * (end - start) / Stopwatch.Frequency}"); flushJobs.Dispose(Pool.SpecializeFor <NarrowPhaseFlushJob>()); PairCache.Postflush(); ConstraintRemover.Postflush(); OnPostflush(threadDispatcher); }
unsafe void CollectSubtreesForNodeDirect(int nodeIndex, int remainingDepth, ref QuickList <int> subtrees, ref QuickQueue <int> internalNodes, out float treeletCost) { internalNodes.EnqueueUnsafely(nodeIndex); treeletCost = 0; var node = nodes + nodeIndex; var children = &node->A; --remainingDepth; if (remainingDepth >= 0) { for (int i = 0; i < 2; ++i) { ref var child = ref children[i]; if (child.Index >= 0) { treeletCost += ComputeBoundsMetric(ref child.Min, ref child.Max); float childCost; CollectSubtreesForNodeDirect(child.Index, remainingDepth, ref subtrees, ref internalNodes, out childCost); treeletCost += childCost; } else { //It's a leaf, immediately add it to subtrees. subtrees.AddUnsafely(child.Index); } } }
public TextBuilder Append(string text, int start, int count) { characters.EnsureCapacity(characters.Count + text.Length, new PassthroughArrayPool <char>()); int end = start + count; for (int i = start; i < end; ++i) { characters.AddUnsafely(text[i]); } return(this); }
/// <summary> /// Wakes up all bodies and constraints within a set. Doesn't do anything if the set is awake (index zero). /// </summary> /// <param name="setIndex">Index of the set to awaken.</param> public void AwakenSet(int setIndex) { if (setIndex > 0) { ValidateSleepingSetIndex(setIndex); //TODO: Some fairly pointless work here- spans or other approaches could help with the API. var list = new QuickList <int>(1, pool); list.AddUnsafely(setIndex); AwakenSets(ref list); list.Dispose(pool); } }
public unsafe void ExtractLines(ref BallSocketPrestepData prestepBundle, int innerIndex, BodyLocation *bodyLocations, Bodies bodies, ref QuickList <LineInstance, Array <LineInstance> > lines) { //Could do bundles of constraints at a time, but eh. var poseA = bodies.Sets[bodyLocations[0].SetIndex].Poses[bodyLocations[0].Index]; var poseB = bodies.Sets[bodyLocations[1].SetIndex].Poses[bodyLocations[1].Index]; Vector3Wide.GetLane(ref prestepBundle.LocalOffsetA, innerIndex, out var localOffsetA); Vector3Wide.GetLane(ref prestepBundle.LocalOffsetB, innerIndex, out var localOffsetB); Quaternion.Transform(ref localOffsetA, ref poseA.Orientation, out var worldOffsetA); Quaternion.Transform(ref localOffsetB, ref poseB.Orientation, out var worldOffsetB); var endA = poseA.Position + worldOffsetA; var endB = poseB.Position + worldOffsetB; var color = new Vector3(0.2f, 0.2f, 1f); var lineA = new LineInstance(ref poseA.Position, ref endA, ref color); var lineB = new LineInstance(ref poseB.Position, ref endB, ref color); lines.AddUnsafely(ref lineA); lines.AddUnsafely(ref lineB); var errorColor = new Vector3(1, 0, 0); var errorLine = new LineInstance(ref endA, ref endB, ref errorColor); lines.AddUnsafely(ref errorLine); }
public bool TryClaim(int index) { var preclaimValue = Interlocked.CompareExchange(ref ClaimStates[index], ClaimIdentity, 0); if (preclaimValue == 0) { Debug.Assert(WorkerClaims.Count < WorkerClaims.Span.Length, "The claim enumerator should never be invoked if the worker claims buffer is too small to hold all the bodies."); WorkerClaims.AddUnsafely(index); } else if (preclaimValue != ClaimIdentity) { //Note that it only fails when it's both nonzero AND not equal to the claim identity. It means it's claimed by a different worker. return(false); } return(true); }
public unsafe void Insert(Node *node, Node *nodes, ref QuickList <int> subtrees) { var children = &node->A; for (int childIndex = 0; childIndex < 2; ++childIndex) { ref var child = ref children[childIndex]; if (child.Index >= 0) { int index = Count; var cost = Tree.ComputeBoundsMetric(ref child.Min, ref child.Max);// - node->PreviousMetric; ++Count; //Sift up. while (index > 0) { var parentIndex = (index - 1) >> 1; var parent = Entries + parentIndex; if (parent->Cost < cost) { //Pull the parent down. Entries[index] = *parent; index = parentIndex; } else { //Found the insertion spot. break; } } var entry = Entries + index; entry->Index = child.Index; entry->Cost = cost; } else { //Immediately add leaf nodes. subtrees.AddUnsafely(child.Index); } }
public unsafe void RefitAndRefine(Tree tree, IThreadDispatcher threadDispatcher, int frameIndex, float refineAggressivenessScale = 1, float cacheOptimizeAggressivenessScale = 1) { if (tree.leafCount <= 2) { //If there are 2 or less leaves, then refit/refine/cache optimize doesn't do anything at all. //(The root node has no parent, so it does not have a bounding box, and the SAH won't change no matter how we swap the children of the root.) //Avoiding this case also gives the other codepath a guarantee that it will be working with nodes with two children. return; } this.threadDispatcher = threadDispatcher; Tree = tree; //Note that we create per-thread refinement candidates. That's because candidates are found during the multithreaded refit and mark phase, and //we don't want to spend the time doing sync work. The candidates are then pruned down to a target single target set for the refine pass. Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Take(threadDispatcher.ThreadCount, out RefinementCandidates); tree.GetRefitAndMarkTuning(out MaximumSubtrees, out var estimatedRefinementCandidateCount, out RefinementLeafCountThreshold); //Note that the number of refit nodes is not necessarily bound by MaximumSubtrees. It is just a heuristic estimate. Resizing has to be supported. QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), MaximumSubtrees, out RefitNodes); //Note that we haven't rigorously guaranteed a refinement count maximum, so it's possible that the workers will need to resize the per-thread refinement candidate lists. for (int i = 0; i < threadDispatcher.ThreadCount; ++i) { QuickList <int, Buffer <int> > .Create(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>(), estimatedRefinementCandidateCount, out RefinementCandidates[i]); } int multithreadingLeafCountThreshold = Tree.leafCount / (threadDispatcher.ThreadCount * 2); if (multithreadingLeafCountThreshold < RefinementLeafCountThreshold) { multithreadingLeafCountThreshold = RefinementLeafCountThreshold; } CollectNodesForMultithreadedRefit(0, multithreadingLeafCountThreshold, ref RefitNodes, RefinementLeafCountThreshold, ref RefinementCandidates[0], threadDispatcher.GetThreadMemoryPool(0).SpecializeFor <int>()); RefitNodeIndex = -1; threadDispatcher.DispatchWorkers(RefitAndMarkAction); //Condense the set of candidates into a set of targets. int refinementCandidatesCount = 0; for (int i = 0; i < threadDispatcher.ThreadCount; ++i) { refinementCandidatesCount += RefinementCandidates[i].Count; } Tree.GetRefineTuning(frameIndex, refinementCandidatesCount, refineAggressivenessScale, RefitCostChange, threadDispatcher.ThreadCount, out var targetRefinementCount, out var period, out var offset); QuickList <int, Buffer <int> > .Create(tree.Pool.SpecializeFor <int>(), targetRefinementCount, out RefinementTargets); //Note that only a subset of all refinement *candidates* will become refinement *targets*. //We start at a semirandom offset and then skip through the set to accumulate targets. //The number of candidates that become targets is based on the refinement aggressiveness, //tuned by both user input (the scale) and on the volatility of the tree (RefitCostChange). var currentCandidatesIndex = 0; int index = offset; for (int i = 0; i < targetRefinementCount - 1; ++i) { index += period; //Wrap around if the index doesn't fit. while (index >= RefinementCandidates[currentCandidatesIndex].Count) { index -= RefinementCandidates[currentCandidatesIndex].Count; ++currentCandidatesIndex; if (currentCandidatesIndex >= threadDispatcher.ThreadCount) { currentCandidatesIndex -= threadDispatcher.ThreadCount; } } Debug.Assert(index < RefinementCandidates[currentCandidatesIndex].Count && index >= 0); var nodeIndex = RefinementCandidates[currentCandidatesIndex][index]; RefinementTargets.AddUnsafely(nodeIndex); tree.nodes[nodeIndex].RefineFlag = 1; } //Note that the root node is only refined if it was not picked as a target earlier. if (tree.nodes->RefineFlag != 1) { RefinementTargets.AddUnsafely(0); tree.nodes->RefineFlag = 1; } RefineIndex = -1; threadDispatcher.DispatchWorkers(RefineAction); //To multithread this, give each worker a contiguous chunk of nodes. You want to do the biggest chunks possible to chain decent cache behavior as far as possible. //Note that more cache optimization is required with more threads, since spreading it out more slightly lessens its effectiveness. var cacheOptimizeCount = Tree.GetCacheOptimizeTuning(MaximumSubtrees, RefitCostChange, (Math.Max(1, threadDispatcher.ThreadCount * 0.25f)) * cacheOptimizeAggressivenessScale); var cacheOptimizationTasks = threadDispatcher.ThreadCount * 2; PerWorkerCacheOptimizeCount = cacheOptimizeCount / cacheOptimizationTasks; var startIndex = (int)(((long)frameIndex * PerWorkerCacheOptimizeCount) % Tree.nodeCount); QuickList <int, Buffer <int> > .Create(Tree.Pool.SpecializeFor <int>(), cacheOptimizationTasks, out CacheOptimizeStarts); CacheOptimizeStarts.AddUnsafely(startIndex); var optimizationSpacing = Tree.nodeCount / threadDispatcher.ThreadCount; var optimizationSpacingWithExtra = optimizationSpacing + 1; var optimizationRemainder = Tree.nodeCount - optimizationSpacing * threadDispatcher.ThreadCount; for (int i = 1; i < cacheOptimizationTasks; ++i) { if (optimizationRemainder > 0) { startIndex += optimizationSpacingWithExtra; --optimizationRemainder; } else { startIndex += optimizationSpacing; } if (startIndex >= Tree.nodeCount) { startIndex -= Tree.nodeCount; } Debug.Assert(startIndex >= 0 && startIndex < Tree.nodeCount); CacheOptimizeStarts.AddUnsafely(startIndex); } threadDispatcher.DispatchWorkers(CacheOptimizeAction); for (int i = 0; i < threadDispatcher.ThreadCount; ++i) { //Note the use of the thread memory pool. Each thread allocated their own memory for the list since resizes were possible. RefinementCandidates[i].Dispose(threadDispatcher.GetThreadMemoryPool(i).SpecializeFor <int>()); } Tree.Pool.SpecializeFor <QuickList <int, Buffer <int> > >().Return(ref RefinementCandidates); RefitNodes.Dispose(Tree.Pool.SpecializeFor <int>()); RefinementTargets.Dispose(Tree.Pool.SpecializeFor <int>()); CacheOptimizeStarts.Dispose(Tree.Pool.SpecializeFor <int>()); Tree = null; this.threadDispatcher = null; }
public static void Test() { var pool = new BufferPool(); var tree = new Tree(pool, 128); const int leafCountAlongXAxis = 11; const int leafCountAlongYAxis = 13; const int leafCountAlongZAxis = 15; var leafCount = leafCountAlongXAxis * leafCountAlongYAxis * leafCountAlongZAxis; pool.Take <BoundingBox>(leafCount, out var leafBounds); const float boundsSpan = 2; const float spanRange = 2; const float boundsSpacing = 3; var random = new Random(5); for (int i = 0; i < leafCountAlongXAxis; ++i) { for (int j = 0; j < leafCountAlongYAxis; ++j) { for (int k = 0; k < leafCountAlongZAxis; ++k) { var index = leafCountAlongXAxis * leafCountAlongYAxis * k + leafCountAlongXAxis * j + i; leafBounds[index].Min = new Vector3(i, j, k) * boundsSpacing; leafBounds[index].Max = leafBounds[index].Min + new Vector3(boundsSpan) + spanRange * new Vector3((float)random.NextDouble(), (float)random.NextDouble(), (float)random.NextDouble()); } } } var prebuiltCount = Math.Max(leafCount / 2, 1); tree.SweepBuild(pool, leafBounds.Slice(prebuiltCount)); tree.Validate(); for (int i = prebuiltCount; i < leafCount; ++i) { tree.Add(ref leafBounds[i], pool); } tree.Validate(); pool.TakeAtLeast <int>(leafCount, out var handleToLeafIndex); pool.TakeAtLeast <int>(leafCount, out var leafIndexToHandle); for (int i = 0; i < leafCount; ++i) { handleToLeafIndex[i] = i; leafIndexToHandle[i] = i; } const int iterations = 100000; const int maximumChangesPerIteration = 20; var threadDispatcher = new SimpleThreadDispatcher(Environment.ProcessorCount); var refineContext = new Tree.RefitAndRefineMultithreadedContext(); var selfTestContext = new Tree.MultithreadedSelfTest <OverlapHandler>(pool); var overlapHandlers = new OverlapHandler[threadDispatcher.ThreadCount]; Action <int> pairTestAction = selfTestContext.PairTest; var removedLeafHandles = new QuickList <int>(leafCount, pool); for (int i = 0; i < iterations; ++i) { var changeCount = random.Next(maximumChangesPerIteration); for (int j = 0; j <= changeCount; ++j) { var addedFraction = tree.LeafCount / (float)leafCount; if (random.NextDouble() < addedFraction) { //Remove a leaf. var leafIndexToRemove = random.Next(tree.LeafCount); var handleToRemove = leafIndexToHandle[leafIndexToRemove]; var movedLeafIndex = tree.RemoveAt(leafIndexToRemove); if (movedLeafIndex >= 0) { var movedHandle = leafIndexToHandle[movedLeafIndex]; handleToLeafIndex[movedHandle] = leafIndexToRemove; leafIndexToHandle[leafIndexToRemove] = movedHandle; leafIndexToHandle[movedLeafIndex] = -1; } else { //The removed leaf was the last one. This leaf index is no longer associated with any existing leaf. leafIndexToHandle[leafIndexToRemove] = -1; } handleToLeafIndex[handleToRemove] = -1; removedLeafHandles.AddUnsafely(handleToRemove); tree.Validate(); } else { //Add a leaf. var indexInRemovedList = random.Next(removedLeafHandles.Count); var handleToAdd = removedLeafHandles[indexInRemovedList]; removedLeafHandles.FastRemoveAt(indexInRemovedList); var leafIndex = tree.Add(ref leafBounds[handleToAdd], pool); leafIndexToHandle[leafIndex] = handleToAdd; handleToLeafIndex[handleToAdd] = leafIndex; tree.Validate(); } } tree.Refit(); tree.Validate(); tree.RefitAndRefine(pool, i); tree.Validate(); var handler = new OverlapHandler(); tree.GetSelfOverlaps(ref handler); tree.Validate(); refineContext.RefitAndRefine(ref tree, pool, threadDispatcher, i); tree.Validate(); for (int k = 0; k < threadDispatcher.ThreadCount; ++k) { overlapHandlers[k] = new OverlapHandler(); } selfTestContext.PrepareJobs(ref tree, overlapHandlers, threadDispatcher.ThreadCount); threadDispatcher.DispatchWorkers(pairTestAction); selfTestContext.CompleteSelfTest(); tree.Validate(); if (i % 50 == 0) { Console.WriteLine($"Cost: {tree.MeasureCostMetric()}"); Console.WriteLine($"Cache Quality: {tree.MeasureCacheQuality()}"); Console.WriteLine($"Overlap Count: {handler.OverlapCount}"); } } threadDispatcher.Dispose(); pool.Clear(); }
public void ReturnUnsafely(int id) { AvailableIds.AddUnsafely(id); }