예제 #1
0
        public IPuzzleMove selectMove(IPuzzleState gameState)
        {
            List <IPuzzleMove> moves    = gameState.GetMoves();
            IPuzzleMove        bestMove = null;

            if (rng.NextDouble() > epsilon)
            {
                IPuzzleState       clone     = gameState.Clone();
                double             maxReward = double.MinValue;
                List <IPuzzleMove> bestMoves = new List <IPuzzleMove>();
                foreach (IPuzzleMove move in clone.GetMoves())
                {
                    clone.DoMove(move);
                    double result = clone.GetResult();
                    if (result > maxReward)
                    {
                        bestMoves.Clear();
                        bestMoves.Add(move);
                        maxReward = result;
                        //bestMove = move;
                    }
                    else if (result == maxReward)
                    {
                        bestMoves.Add(move);
                    }
                    clone = gameState.Clone();
                }
                //return bestMoves[0];
                return(bestMoves[rng.Next(bestMoves.Count())]);
            }
            else
            {
                bestMove = gameState.GetRandomMove();
            }
            return(bestMove);
        }
        public IPuzzleMove Search(IPuzzleState rootState, int iterations, double maxTimeInMinutes = 5)
        {
            IterationsForFirstSolution = -1;
            nodeCount        = 0;
            nodesEliminated  = 0;
            nodesNotExpanded = 0;
            bool looped;

            if (!search)
            {
                search = true;
            }
            double minReward = double.MaxValue;
            double maxReward = double.MinValue;

            maxDepth = 0;
            // If needed clean the pool, restore all objects in the pool to the initial value
            if (objectPool.NeedToClean)
            {
                objectPool.CleanObjectPool();
            }

            ISPTreeNode rootNode = treeCreator.GenRootNode(rootState);
            ISPTreeNode head     = null;
            ISPTreeNode tail     = null;

            HashSet <IPuzzleMove> allFirstMoves  = new HashSet <IPuzzleMove>();
            List <IPuzzleMove>    currentRollout = new List <IPuzzleMove>();

            solutionHashes = new HashSet <int>();

#if PROFILING
            long beforeMemory = GC.GetTotalMemory(false);
            long afterMemory  = GC.GetTotalMemory(false);
            long usedMemory   = afterMemory - beforeMemory;
            long averageUsedMemoryPerIteration = 0;
#endif
            int deadlocksInTree = 0;
            int currentDepth    = 0;
            for (iterationsExecuted = 0; iterationsExecuted < iterations; iterationsExecuted++)
            {
                looped = false;
                ISPTreeNode  node  = rootNode;
                IPuzzleState state = rootState.Clone();
                //Debug.WriteLine(node.TreeToString(0));
                HashSet <IPuzzleState> visitedStatesInRollout = new HashSet <IPuzzleState>()
                {
                    state.Clone()
                };

                // Clear lists of moves used for RAVE updates && best rollout
                solutionHash   = 27;
                currentRollout = new List <IPuzzleMove>();
                allFirstMoves.Clear();

                // Select
                while (!node.HasMovesToTry() && node.HasChildren())
                {
                    // UCB1-Tuned and RAVE Optimizations
                    node = node.SelectChild();
                    state.DoMove(node.Move);
                    visitedStatesInRollout.Add(state.Clone());
                    // RAVE Optimization && best rollout
                    currentRollout.Add(node.Move);
                    UpdateSolutionHash(node.Move);
                    allFirstMoves.Add(node.Move);

                    // Node Recycling Optimization
                    if (((Opt_SP_UCTTreeNode)node).NodeRecycling)
                    {
                        // Non-leaf node removed from LRU queue during playout
                        if (node.NextLRUElem != null && node.PrevLRUElem != null)
                        {
                            LRUQueueManager.LRURemoveElement(ref node, ref head, ref tail);
                        }
                    }
                }
                IPuzzleState backupState = state.Clone();

                if (!node.HasChildren() && !node.HasMovesToTry())
                {
                    deadlocksInTree++;
                }
                else
                {
                    Debug.Write("");
                }

                // Expand
                if (node.HasMovesToTry())
                {
                    IPuzzleMove move = node.SelectUntriedMove();
                    if (move != -1)
                    {
                        state.DoMove(move);

                        // Node Recycling Optimization
                        if (((Opt_SP_UCTTreeNode)node).NodeRecycling)
                        {
                            if (memoryBudget == nodeCount && head != null)
                            {
                                head.ChildRecycle();
                                nodeCount--;
                                // Change LRU queue head when it becomes a leaf node
                                if (!head.HasChildren())
                                {
                                    LRUQueueManager.LRURemoveFirst(ref head, ref tail);
                                }
                            }
                        }

                        if (visitedStatesInRollout.Contains(state))
                        {
                            if (avoidCycles)
                            {
                                while (node.GetUntriedMoves().Count > 0 && visitedStatesInRollout.Contains(state))
                                {
                                    state = backupState.Clone();
                                    move  = node.GetUntriedMoves()[RNG.Next(node.GetUntriedMoves().Count)];
                                    state.DoMove(move);
                                    node.RemoveUntriedMove(move);
                                }
                                if (!visitedStatesInRollout.Contains(state)) //found valid move
                                {
                                    node = node.AddChild(objectPool, move, state);
                                    UpdateSolutionHash(move);
                                    currentRollout.Add(move);
                                    allFirstMoves.Add(move);
                                    nodeCount++;
                                }
                                else //all moves visited
                                {
                                    nodesNotExpanded++;
                                    state = backupState;
                                }
                            }
                            else
                            {
                                nodesNotExpanded++;
                                looped = true;
                            }
                        }
                        else
                        {
                            node = node.AddChild(objectPool, move, state);
                            // RAVE Optimization && best rollout
                            UpdateSolutionHash(move);
                            currentRollout.Add(move);
                            allFirstMoves.Add(move);
                            nodeCount++;
                        }
                        visitedStatesInRollout.Add(state.Clone());
                    }
                    else
                    {
                        state.Pass();
                    }
                }
                else
                {
                    nodesNotExpanded++;
                }


                // Rollout
                while (!state.isTerminal() && !looped)
                {
                    var move = state.GetSimulationMove();
                    backupState = state.Clone();
                    if (move != -1)
                    {
                        state.DoMove(move);
                        if (visitedStatesInRollout.Contains(state))
                        {
                            if (avoidCycles)
                            {
                                state = backupState.Clone();
                                List <IPuzzleMove> availableMoves = state.GetMoves();
                                while (availableMoves.Count > 0 && visitedStatesInRollout.Contains(state))
                                { //keep trying different moves until we end up in an unvisited state
                                    state = backupState.Clone();
                                    move  = availableMoves[RNG.Next(availableMoves.Count)];
                                    availableMoves.Remove(move);
                                    state.DoMove(move);
                                }
                                if (availableMoves.Count == 0 && visitedStatesInRollout.Contains(state))//all states have already been visited
                                {
                                    break;
                                }
                            }
                            else
                            {
                                looped = true;
                            }
                        }
                        // RAVE Optimization && best rollout
                        UpdateSolutionHash(move);
                        currentRollout.Add(move);
                        allFirstMoves.Add(move);
                        visitedStatesInRollout.Add(state.Clone());
                    }
                    else //simulation ended
                    {
                        break;
                        //state.Pass();
                    }
                }

                //Keep topScore and update bestRollout
                double result = state.GetResult();
                minReward = Math.Min(result, minReward);
                maxReward = Math.Max(result, maxReward);
                if (state.EndState() && !solutionHashes.Contains(solutionHash))
                {
                    solutionHashes.Add(solutionHash);
                    solutionCount++;
                    if (iterationsForFirstSolution < 0)
                    {
                        iterationsForFirstSolution = iterationsExecuted + 1;
                    }
                }
                if (result > topScore || result == topScore && currentRollout.Count < bestRollout.Count)
                {
                    topScore    = result;
                    bestRollout = currentRollout;
                    if (state.EndState() && stopOnResult)
                    {
                        iterationsExecuted++;
                        break;
                    }
                }


                // Backpropagate
                currentDepth = 0;
                while (node != null)
                {
                    if (looped)
                    {
                        //TODO penalize score for loops?
                    }
                    ISPTreeNode parent = node.Parent;
                    //if a node is a dead end remove it from the tree
                    if (!node.HasChildren() && !node.HasMovesToTry() && !state.EndState() && useNodeElimination)
                    {
                        if (node.Parent == null)//unsolvable level. The tree has been completely explored. Return current best score
                        {
                            //SinglePlayerMCTSMain.Log("Unsolvable Level");
                            //Console.WriteLine("\nUnsolvable Level");
                            break;
                        }
                        node.Parent.RemoveChild(node);
                        nodeCount--;
                        nodesEliminated++;
                        currentDepth--;
                    }

                    // RAVE Optimization
                    node.Update(result, allFirstMoves);
                    node = parent;
                    currentDepth++;
                    // Node Recycling Optimization
                    if (((Opt_SP_UCTTreeNode)rootNode).NodeRecycling)
                    {
                        // Non-leaf node pushed back to LRU queue when updated
                        if (node != rootNode && node != null && node.HasChildren())
                        {
                            LRUQueueManager.LRUAddLast(ref node, ref head, ref tail);
                        }
                    }
                }

                maxDepth = Math.Max(maxDepth, currentDepth);

                if (!rootNode.HasChildren() && !rootNode.HasMovesToTry())
                {
                    break;
                }

                if (!search)
                {
                    search = true;
                    return(null);
                }

                #if PROFILING
                afterMemory = GC.GetTotalMemory(false);
                usedMemory  = afterMemory - beforeMemory;
                averageUsedMemoryPerIteration = usedMemory / (i + 1);

                var outStringToWrite = string.Format(" optMCTS search: {0:0.00}% [{1} of {2}] - Total used memory B(MB): {3}({4:N7}) - Average used memory per iteration B(MB): {5}({6:N7})\n",
                                                     (float)((i + 1) * 100) / (float)iterations, i + 1, iterations, usedMemory, usedMemory / 1024 / 1024, averageUsedMemoryPerIteration,
                                                     (float)averageUsedMemoryPerIteration / 1024 / 1024);
                    #if DEBUG
                if (showMemoryUsage)
                {
                    Console.Write(outStringToWrite);
                    Console.SetCursorPosition(0, Console.CursorTop);
                }
                    #endif
                #endif

                //Console.WriteLine(rootNode.TreeToString(0));
            }
            //Console.WriteLine();

            objectPool.NeedToClean = true;

            //#if DEBUG
            //    Console.WriteLine(rootNode.ChildrenToString());
            //    Console.WriteLine(rootNode.TreeToString(0));
            //#endif


            IPuzzleMove bestMove;
            if (bestRollout != null && bestRollout.Count > 0) //Remove first move from rollout so that if the topScore is not beaten we can just take the next move on the next search
            {
                bestMove = bestRollout[0];
                bestRollout.RemoveAt(0);
            }
            else
            {
                bestMove = rootNode.GetBestMove();
            }
            Debug.WriteLine(rootNode.TreeToString(0));
            Debug.WriteLine("Min Reward: " + minReward + " - Max Reward: " + maxReward);
            visits     = new List <int>();
            raveVisits = new List <int>();
            CountVisits((Opt_SP_UCTTreeNode)rootNode, visits, raveVisits);

            visits.Sort((x, y) => (x.CompareTo(y)));
            raveVisits.Sort((x, y) => (x.CompareTo(y)));
            //string visitsString = LogVisits((Opt_SP_UCTTreeNode) rootNode);
            //SinglePlayerMCTSMain.Log("Iterations: "+IterationsExecuted+" NodeCount: " + nodeCount+" "+visitsString);
            return(bestMove);
        }