Example #1
0
        private static Tuple <List <float>, List <float>, List <Direction> > PseudoPlyStep(FastWorld w, InternalConfiguration c, IMultiHeuristic heuristic, int currentDepth, int currentPlyDepth, List <int> steppingOrder, List <Direction> moves)
        {
            // Immediately check stop search on each draw (but not ply)
            if (c.StopHandler.StopRequested)
            {
                throw new DeepeningSearch.StopSearchException();
            }

            if (currentPlyDepth == steppingOrder.Count)
            {
                // Increase diagnostic step counter
                c.steps++;

                // All plys played, perform move
                var newW = w.Clone() as FastWorld;
                newW.UpdateMovementTick(moves);

                var cachedMetricState = new CachedMultiMetricState(newW);

                int nextDepth = currentDepth + 1;

                var          oldMoves = new List <Direction>(moves);
                List <float> phiAbs, phiRel;

                if (nextDepth == c.Depth || heuristic.IsTerminal(cachedMetricState))
                {
                    (phiAbs, phiRel) = ScoreAdvantage(cachedMetricState, c, heuristic);
                }
                else
                {
                    (phiAbs, phiRel, _) = PseudoPlyStep(newW, c, heuristic, nextDepth, 0, steppingOrder, null);
                }

                return(Tuple.Create(phiAbs, phiRel, oldMoves));
            }
            else if (currentPlyDepth == 0)
            {
                // First to play on this draw is responsible for setting up move cache by definition
                // Must create new move list to not mess with parent plys (and because by definition we did not receive the parent move list)
                moves = new List <Direction>(w.Snakes.Count);

                for (int i = 0; i < w.Snakes.Count; ++i)
                {
                    if (w.Snakes[i].Alive && c.ReflexMask[i])
                    {
                        moves.Add(Util.ImprovedReflexBasedEvade(w, i));
                    }
                    else
                    {
                        moves.Add(Direction.North);
                    }
                }
            }

            int ownIndex = steppingOrder[currentPlyDepth];

            float            alpha     = float.NegativeInfinity;
            List <float>     phiRelMax = null;
            List <float>     phiAbsMax = null;
            List <Direction> dirMax    = null;

            for (int i = 0; i < Util.Directions.Length; ++i)
            {
                // Must always evaluate at least one move, even if it is known to be deadly,
                // to have correct heuristics bounds
                bool mustEvaluate = phiRelMax == null && i == Util.Directions.Length - 1;

                // Skip certainly deadly moves if we do not have to evaluate
                if (!mustEvaluate && w.CertainlyDeadly(ownIndex, Util.Directions[i]))
                {
                    continue;
                }

                moves[ownIndex] = Util.Directions[i];

                var(phiStarAbs, phiStarRel, directions) = PseudoPlyStep(w, c, heuristic, currentDepth, currentPlyDepth + 1, steppingOrder, moves);
                var phiStarP = phiStarRel[ownIndex];

                if (alpha < phiStarP)
                {
                    alpha     = phiStarP;
                    phiRelMax = phiStarRel;
                    phiAbsMax = phiStarAbs;
                    dirMax    = directions;
                }

                Debug.Assert(alpha != float.NegativeInfinity);
            }

            Trace.Assert(phiRelMax != null);
            Trace.Assert(dirMax != null);

            return(Tuple.Create(phiAbsMax, phiRelMax, dirMax));
        }
Example #2
0
        private static Tuple <List <float>, List <float> > ScoreAdvantage(CachedMultiMetricState w, InternalConfiguration c, IMultiHeuristic heuristic)
        {
            var count = w.World.Snakes.Count;

            var phiAbs = new List <float>(count);

            float sum = 0.0f;

            for (int i = 0; i < count; ++i)
            {
                if (c.ReflexMask[i])
                {
                    // Reflex based agents always have a score of 0
                    // This way if we are only simulating against reflex based agents, we still
                    // have advantage for better moves (opposed to only considering fully simulated
                    // agents, where advantage would always be 0 compared to average of simulated for one simulated).
                    phiAbs.Add(0.0f);
                    // Don't need to add to sum for 0
                }
                else
                {
                    // Fully expanded agents estimate their score by the provided heuristic
                    float val = heuristic.Score(w, i);

                    Debug.Assert(!float.IsNaN(val));

                    phiAbs.Add(val);
                    sum += val;
                }
            }

            // Take average to score advantage
            float avg = sum / count;

            var phiRel = new List <float>(count);

            // Subtract average from all heuristic scores to get comparative advantage
            for (int i = 0; i < count; ++i)
            {
                phiRel.Add(phiAbs[i] - avg);
            }

            Debug.Assert(!phiAbs.Any(f => float.IsNaN(f)));

            return(Tuple.Create(phiAbs, phiRel));
        }