public static float Score(FastWorld w, int ownIndex, Direction move) { // If known to be deadly return minimum if (w.CertainlyDeadly(ownIndex, move)) { return(-1000.0f); } var newHead = w.Snakes[ownIndex].Head.Advanced(move); float enemyCollisionScore = 0.0f; // If stepping on snake, punish badly // This always means stepping on another snake, as // CertainlyDeadly already checks self if (w[newHead].occupant == FastWorld.Occupant.Snake) { enemyCollisionScore -= 1.0f; } float fruitScore = 0.0f; // Reward stepping on fruit if (w[newHead].occupant == FastWorld.Occupant.Fruit) { fruitScore = 1.0f; } float potentialCollisionScore = 0.0f; // Punish proximity to enemy that may kill us for (int i = 0; i < w.Snakes.Count; ++i) { if (!w.Snakes[i].Alive || i == ownIndex) { continue; } var manhattan = Coord.ManhattanDistance(w.Snakes[i].Head, newHead); if (manhattan == 1) { // TODO: Delta does not take into account growing yet var delta = w.Snakes[ownIndex].Length - w.Snakes[i].Length; if (delta == 0) { // Punish potential tie collision potentialCollisionScore -= 0.5f; } else if (delta > 0) { // Reward potential collision with smaller snake // less than potential tie potentialCollisionScore += 0.3f; } else { // Punish potential collision with larger snake potentialCollisionScore -= 1.0f; } } } // Asume holding current direction float holdScore = move == w.Snakes[ownIndex].LastDirection ? 1.0f : 0.0f; return(holdScore * 1.0f + fruitScore * 3.0f + potentialCollisionScore * 10.0f + enemyCollisionScore * 30.0f); }
private static Tuple <List <float>, List <float>, List <Direction> > PseudoPlyStep(FastWorld w, InternalConfiguration c, IMultiHeuristic heuristic, int currentDepth, int currentPlyDepth, List <int> steppingOrder, List <Direction> moves) { // Immediately check stop search on each draw (but not ply) if (c.StopHandler.StopRequested) { throw new DeepeningSearch.StopSearchException(); } if (currentPlyDepth == steppingOrder.Count) { // Increase diagnostic step counter c.steps++; // All plys played, perform move var newW = w.Clone() as FastWorld; newW.UpdateMovementTick(moves); var cachedMetricState = new CachedMultiMetricState(newW); int nextDepth = currentDepth + 1; var oldMoves = new List <Direction>(moves); List <float> phiAbs, phiRel; if (nextDepth == c.Depth || heuristic.IsTerminal(cachedMetricState)) { (phiAbs, phiRel) = ScoreAdvantage(cachedMetricState, c, heuristic); } else { (phiAbs, phiRel, _) = PseudoPlyStep(newW, c, heuristic, nextDepth, 0, steppingOrder, null); } return(Tuple.Create(phiAbs, phiRel, oldMoves)); } else if (currentPlyDepth == 0) { // First to play on this draw is responsible for setting up move cache by definition // Must create new move list to not mess with parent plys (and because by definition we did not receive the parent move list) moves = new List <Direction>(w.Snakes.Count); for (int i = 0; i < w.Snakes.Count; ++i) { if (w.Snakes[i].Alive && c.ReflexMask[i]) { moves.Add(Util.ImprovedReflexBasedEvade(w, i)); } else { moves.Add(Direction.North); } } } int ownIndex = steppingOrder[currentPlyDepth]; float alpha = float.NegativeInfinity; List <float> phiRelMax = null; List <float> phiAbsMax = null; List <Direction> dirMax = null; for (int i = 0; i < Util.Directions.Length; ++i) { // Must always evaluate at least one move, even if it is known to be deadly, // to have correct heuristics bounds bool mustEvaluate = phiRelMax == null && i == Util.Directions.Length - 1; // Skip certainly deadly moves if we do not have to evaluate if (!mustEvaluate && w.CertainlyDeadly(ownIndex, Util.Directions[i])) { continue; } moves[ownIndex] = Util.Directions[i]; var(phiStarAbs, phiStarRel, directions) = PseudoPlyStep(w, c, heuristic, currentDepth, currentPlyDepth + 1, steppingOrder, moves); var phiStarP = phiStarRel[ownIndex]; if (alpha < phiStarP) { alpha = phiStarP; phiRelMax = phiStarRel; phiAbsMax = phiStarAbs; dirMax = directions; } Debug.Assert(alpha != float.NegativeInfinity); } Trace.Assert(phiRelMax != null); Trace.Assert(dirMax != null); return(Tuple.Create(phiAbsMax, phiRelMax, dirMax)); }
public static Tuple <Direction, float> BestWithHeuristic(Configuration c, FastWorld w, int maxDepth, int currentDepth, float alpha, float betaInitial) { // Fail fast if terminal or depth exhausted bool terminal = c.Heuristic.IsTerminal(w); bool limitReached = currentDepth >= maxDepth; if (terminal || limitReached) { var score = c.Heuristic.Score(w); return(Tuple.Create(Direction.North, score)); } Direction bestOwnDirection = Direction.North; var desiredMoves = new List <Direction>(w.Snakes.Count); for (int i = 0; i < w.Snakes.Count; ++i) { // Use the given decision functions for all snakes first desiredMoves.Add(c.UntargetedDecisionFunction(w, i)); } // Initially have not checked any own moves. We must always check at least one // available action to allow the heuristic to evaluate one leaf, even if we know // it leads to death. Otherwise would return the theoretical heuristic min value (-Inf) // not the practical lower bound as implemented bool checkedOwnMove = false; for (int i = 0; i < Util.Directions.Length; ++i) { // If this is the last available action and we have not evaluated any actions, // must evaluate. bool mustEvaluateOwn = !checkedOwnMove && i == Util.Directions.Length - 1; // Skip guaranteed deadly immediately if (!mustEvaluateOwn && w.CertainlyDeadly(c.OwnIndex, Util.Directions[i])) { continue; } checkedOwnMove = true; // Must reset beta to initial beta value of this node, otherwise using updated beta from different sub tree // Beta stores the best move possible for the opponent var beta = betaInitial; bool checkedEnemyMove = true; for (int j = 0; j < Util.Directions.Length; ++j) { // Check stop request in inner loop if (c.Stop.StopRequested) { throw new StopSearchException(); } bool mustEvaluateEnemy = !checkedEnemyMove && j == Util.Directions.Length - 1; // Skip guaranteed deadly immediately if (!mustEvaluateEnemy && w.CertainlyDeadly(c.EnemyIndex, Util.Directions[j])) { continue; } desiredMoves[c.OwnIndex] = Util.Directions[i]; desiredMoves[c.EnemyIndex] = Util.Directions[j]; var worldInstance = w.Clone() as FastWorld; worldInstance.UpdateMovementTick(desiredMoves); var tuple = BestWithHeuristic(c, worldInstance, maxDepth, currentDepth + 1, alpha, beta); if (tuple.Item2 < beta) { beta = tuple.Item2; } // If the best move possible is worse for the first player than the current worst, // stop, no need to find even worse moves if (alpha >= beta) { // Alpha cut-off break; } } if (beta > alpha) { alpha = beta; bestOwnDirection = Util.Directions[i]; } // If our best move is even better than the current choice of the opponent // stop, no need to find even better moves // Of course have to compare to initial beta value here if (alpha >= betaInitial) { // Beta cut-off break; } } return(Tuple.Create(bestOwnDirection, alpha)); }