private static Tuple <List <float>, List <float> > ScoreAdvantage(CachedMultiMetricState w, InternalConfiguration c, IMultiHeuristic heuristic) { var count = w.World.Snakes.Count; var phiAbs = new List <float>(count); float sum = 0.0f; for (int i = 0; i < count; ++i) { if (c.ReflexMask[i]) { // Reflex based agents always have a score of 0 // This way if we are only simulating against reflex based agents, we still // have advantage for better moves (opposed to only considering fully simulated // agents, where advantage would always be 0 compared to average of simulated for one simulated). phiAbs.Add(0.0f); // Don't need to add to sum for 0 } else { // Fully expanded agents estimate their score by the provided heuristic float val = heuristic.Score(w, i); Debug.Assert(!float.IsNaN(val)); phiAbs.Add(val); sum += val; } } // Take average to score advantage float avg = sum / count; var phiRel = new List <float>(count); // Subtract average from all heuristic scores to get comparative advantage for (int i = 0; i < count; ++i) { phiRel.Add(phiAbs[i] - avg); } Debug.Assert(!phiAbs.Any(f => float.IsNaN(f))); return(Tuple.Create(phiAbs, phiRel)); }
private static Tuple <List <float>, List <float>, List <Direction> > PseudoPlyStep(FastWorld w, InternalConfiguration c, IMultiHeuristic heuristic, int currentDepth, int currentPlyDepth, List <int> steppingOrder, List <Direction> moves) { // Immediately check stop search on each draw (but not ply) if (c.StopHandler.StopRequested) { throw new DeepeningSearch.StopSearchException(); } if (currentPlyDepth == steppingOrder.Count) { // Increase diagnostic step counter c.steps++; // All plys played, perform move var newW = w.Clone() as FastWorld; newW.UpdateMovementTick(moves); var cachedMetricState = new CachedMultiMetricState(newW); int nextDepth = currentDepth + 1; var oldMoves = new List <Direction>(moves); List <float> phiAbs, phiRel; if (nextDepth == c.Depth || heuristic.IsTerminal(cachedMetricState)) { (phiAbs, phiRel) = ScoreAdvantage(cachedMetricState, c, heuristic); } else { (phiAbs, phiRel, _) = PseudoPlyStep(newW, c, heuristic, nextDepth, 0, steppingOrder, null); } return(Tuple.Create(phiAbs, phiRel, oldMoves)); } else if (currentPlyDepth == 0) { // First to play on this draw is responsible for setting up move cache by definition // Must create new move list to not mess with parent plys (and because by definition we did not receive the parent move list) moves = new List <Direction>(w.Snakes.Count); for (int i = 0; i < w.Snakes.Count; ++i) { if (w.Snakes[i].Alive && c.ReflexMask[i]) { moves.Add(Util.ImprovedReflexBasedEvade(w, i)); } else { moves.Add(Direction.North); } } } int ownIndex = steppingOrder[currentPlyDepth]; float alpha = float.NegativeInfinity; List <float> phiRelMax = null; List <float> phiAbsMax = null; List <Direction> dirMax = null; for (int i = 0; i < Util.Directions.Length; ++i) { // Must always evaluate at least one move, even if it is known to be deadly, // to have correct heuristics bounds bool mustEvaluate = phiRelMax == null && i == Util.Directions.Length - 1; // Skip certainly deadly moves if we do not have to evaluate if (!mustEvaluate && w.CertainlyDeadly(ownIndex, Util.Directions[i])) { continue; } moves[ownIndex] = Util.Directions[i]; var(phiStarAbs, phiStarRel, directions) = PseudoPlyStep(w, c, heuristic, currentDepth, currentPlyDepth + 1, steppingOrder, moves); var phiStarP = phiStarRel[ownIndex]; if (alpha < phiStarP) { alpha = phiStarP; phiRelMax = phiStarRel; phiAbsMax = phiStarAbs; dirMax = directions; } Debug.Assert(alpha != float.NegativeInfinity); } Trace.Assert(phiRelMax != null); Trace.Assert(dirMax != null); return(Tuple.Create(phiAbsMax, phiRelMax, dirMax)); }