/// <summary> /// Pops the lowest FCost node index from the open set. /// </summary> /// <param name="openSet"></param> /// <param name="nodesInfo"></param> /// <returns></returns> private int PopLowestFCostNodeIndexFromOpenSet(NativeList <int> openSet, NativeArray <NodePathFindInfo> nodesInfo) { int foundAtIndex = -1; int lowestIndex = -1; int lowestFCostHCost = int.MaxValue; int lowestFCostVal = int.MaxValue; for (int i = 0; i < openSet.Length; ++i) { int currNodeIndex = openSet[i]; NodePathFindInfo info = nodesInfo[currNodeIndex]; if (info.FCost < lowestFCostVal || info.FCost == lowestFCostVal && info.hCost < lowestFCostHCost) { foundAtIndex = i; lowestIndex = currNodeIndex; lowestFCostHCost = info.hCost; lowestFCostVal = info.FCost; } } // I have tested RemoveAt vs RemoveAtSwapBack, and oddly enough there seems to be a // extremely slightly difference in favour of RemoveAt, but perhaps this is due to // the non-totally-deterministic stress test... I'm still surprised there is not a // notable difference in favour of RemoveAtSwapBack though. openSet.RemoveAtSwapBack(foundAtIndex); return(lowestIndex); }
public void Execute(int index) { int globalIndex = jobIndexStride + index; int startNodeIndex = startIndices[globalIndex]; int endNodeIndex = endIndices[globalIndex]; // Note: Temp allocator can fall back to a very much slower version if the block of // memory that it uses is exhausted. By the looks of the tests that I have done, it // seems that this memory is released after the job is finished. I had this // originally in an IJobParallelFor and there were threads that introduced // significant bottlenecks due to this issue (the inner loop batch count didn't // matter), but after switching to a "batch of IJobs" this issue is gone, as the // maximum jobs that can run at the same time is just the number of logical threads // of the system, which shouldn't be more than ~32 in a high end system (although // some threadrippers can go up to 128, which eventually may be an issue again). I // have tested this in a complete flat map with no obstacles, and the time to // complete each job is reasonably similar. // Update: I have been exhaustively testing the new approach with thousands of IJobs // and simple paths. This approach is certainly much better than the one mentioned // above, but it has a considerably big issue: when there's tons of simple paths, // the dependencies become too complex and the main thread struggles more and more // as the number of paths increases, since the workers push hard to finish all the // tasks, but the main thread has to check every single jobhandle when calling // CompleteAll or combining dependencies (do note that this is speculation based on // my observations in the profiler though). // Update2: I have ended using IJobFor with very low iterations (8 seem to work fine // enough). I am now in a middle ground between 1. and 2. mentioned above. I think // it is a decent spot but with lots of paths and very low work, I am seeing that // some paths take extremely long time to be computed. At this point I think the // only way to get it better is going into micro-optimization and some kind of // hierarchiccal pathfinding, There are obviously other ways of computing // pathfinding for large crowds such as flow fields, but I'm currently only // interested in A*. If memory is really the problem, hierarchiccal pathfinding // should greatly increase performance as constraining the searchs to i.e a 16x16 // grid (so we could use bytes for gCost and hCost and sub-indices). The open and // closed set would also be extremely constrained and it may also be possible to use // fixedlist for some of the info, which may give huge speedups. // I have tried swapping this by just a plain int3 and surprisingly, it is // substantially slower. NativeArray <NodePathFindInfo> nodesInfo = new NativeArray <NodePathFindInfo>(numNodes, Allocator.Temp); NativeBitArray closedSet = new NativeBitArray(numNodes, Allocator.Temp); NativeBitArray openSetContains = new NativeBitArray(numNodes, Allocator.Temp); // Warning: 272 is a magical number due to the map layout and possible paths, which // seems to not throw errors about being too low for the test scene. This list // should be somewhat constrained to a low range in order to keep the algorithm // performant, otherwise it would be worth to look at implementing a native binary // heap to speed up the extaction of the lowest fcost node. NativeList <int> openSet = new NativeList <int>(272, Allocator.Temp); // set the info for the first node nodesInfo[startNodeIndex] = new NodePathFindInfo(0, GetHeuristic(startNodeIndex, endNodeIndex), -1); openSet.AddNoResize(startNodeIndex); while (openSet.Length > 0) { int currNodeIndex = PopLowestFCostNodeIndexFromOpenSet(openSet, nodesInfo); // we've reached the goal if (currNodeIndex == endNodeIndex) { SaveNextNodeIndexToMoveTo(globalIndex, nodesInfo); return; } // add it to the closed set by setting a flag at its index closedSet.SetBits(currNodeIndex, true, 1); NodePathFindInfo currNodeInfo = nodesInfo[currNodeIndex]; // go over the neighbors int start = currNodeIndex * numNeighbors; int end = start + numNeighbors; for (int i = start; i < end; ++i) { int neighborIndex = nodesNeighbors[i].neighborIndex; // if it does not have neighbor, was already expanded or can't be walked by if (!nodesNeighbors[i].isValid || closedSet.IsSet(neighborIndex) || (byte)nodesTypes[neighborIndex] > 0) { continue; } NodePathFindInfo neighborNodeInfo = nodesInfo[neighborIndex]; int newGCost = currNodeInfo.gCost + GetHeuristic(currNodeIndex, neighborIndex); // not in open set if (!openSetContains.IsSet(neighborIndex)) { // update parent, costs, and add to the open set neighborNodeInfo.gCost = newGCost; neighborNodeInfo.hCost = GetHeuristic(neighborIndex, endNodeIndex); neighborNodeInfo.parentNodeIndex = currNodeIndex; nodesInfo[neighborIndex] = neighborNodeInfo; openSet.AddNoResize(neighborIndex); openSetContains.SetBits(neighborIndex, 1); } else if (newGCost < neighborNodeInfo.gCost) { // update parent, and gCost (hCost is already calculated) neighborNodeInfo.gCost = newGCost; neighborNodeInfo.parentNodeIndex = currNodeIndex; nodesInfo[neighborIndex] = neighborNodeInfo; } } } // Note: TODO? I think the only way to get here is if the way to a valid end node is // completely blocked, in which case I should decide what to do return; }