Beispiel #1
0
 public Opt_SP_UCTTreeNode(IPuzzleMove move, Opt_SP_UCTTreeNode parent, IPuzzleState state, MersenneTwister rng, bool ucb1Tuned, bool rave, double raveThreshold, bool nodeRecycling, double const_C = 1, double const_D = 20000, bool generateUntriedMoves = true)
 {
     Move               = move;
     this.parent        = parent;
     this.const_C       = const_C;
     this.const_D       = const_D;
     rnd                = rng;
     childNodes         = new List <Opt_SP_UCTTreeNode>();
     NextLRUElem        = null;
     PrevLRUElem        = null;
     SetActive          = false;
     wins               = 0;
     visits             = 0;
     squares_rewards    = 0;
     RAVEwins           = 0;
     RAVEvisits         = 0;
     squaredReward      = 0;
     topScore           = double.MinValue;
     this.ucb1Tuned     = ucb1Tuned;
     this.rave          = rave;
     this.raveThreshold = raveThreshold;
     this.nodeRecycling = nodeRecycling;
     if (generateUntriedMoves)
     {
         untriedMoves = state.GetMoves();
     }
 }
Beispiel #2
0
        public IPuzzleMove selectMove(IPuzzleState gameState)
        {
            List <IPuzzleMove> moves = gameState.GetMoves();

            if (rnd.NextDouble() <= 0.00007) //epsilon greedy
            {
                return(moves[rnd.Next(moves.Count)]);
            }
            moves.RemoveAll(item => gameState.GetBoard(SamegameGameMove.GetX(item), SamegameGameMove.GetY(item)) == selectedColor);
            if (moves.Count == 0)
            {
                moves = gameState.GetMoves();
            }
            IPuzzleMove selectedMove = moves[rnd.Next(moves.Count)];

            return(selectedMove);
        }
 public IPuzzleMove selectMove(IPuzzleState gameState)
 {
     //if (moveList != null)
     //{
     //    if (moveList.Count > 0)
     //    {
     //        IPuzzleMove move = moveList[0];
     //        moveList.RemoveAt(0);
     //        return move;
     //    }
     //    else
     //    {
     //        moveList = null;
     //        return (IPuzzleMove)(-1);
     //    }
     //}
     //else
     //{
     //    moveList = idaStar.Solve(gameState, maxNodes, tableSize, maxDepth);
     //    if (moveList.Count == 0)
     //    {
     //        return (IPuzzleMove)(-1);
     //    }
     //    IPuzzleMove move = moveList[0];
     //    moveList.RemoveAt(0);
     //    return move;
     //}
     if (RNG.NextDouble() < epsilon)
     {
         return(gameState.GetMoves()[RNG.Next(gameState.GetMoves().Count)]);
     }
     moveList = idaStar.Solve(gameState, maxNodes, tableSize, maxDepth);
     if (moveList.Count > 0)
     {
         return(moveList[0]);
     }
     else
     {
         return((IPuzzleMove)(-1));
     }
 }
        /// <summary>
        /// Returns an active object from the object pool without resetting any of its values.
        /// You will need to set its values and set it inactive again when you are done with it.
        /// </summary>
        /// <returns>ITreeNode of requested type if it is available, otherwise null.</returns>
        public Opt_SP_UCTTreeNode GetObject(IPuzzleMove move, Opt_SP_UCTTreeNode parent, IPuzzleState state, MersenneTwister rng, bool ucb1Tuned, bool rave, double raveThreshold, bool nodeRecycling, double const_C, double const_D)
        {
            //iterate through all pooled objects.
            foreach (Opt_SP_UCTTreeNode node in pooledObjects)
            {
                //look for the first one that is inactive.
                if (node.SetActive)
                {
                    continue;
                }
                //set the object to active.
                node.SetActive = true;

                //set object's values
                node.Move          = move;
                node.Parent        = parent;
                node.untriedMoves  = state.GetMoves();
                node.Rnd           = rng;
                node.Ucb1Tuned     = ucb1Tuned;
                node.Rave          = rave;
                node.RaveThreshold = raveThreshold;
                node.NodeRecycling = nodeRecycling;
                node.ConstC        = const_C;
                node.ConstD        = const_D;

                //return the object we found.
                return(node);
            }
            //if we make it this far, we obviously didn't find an inactive object.
            //so we need to see if we can grow beyond our current count.
            //if we reach the maximum size we didn't have any inactive objects.
            //we also were unable to grow, so return null as we can't return an object.
            if (maxPoolSize <= pooledObjects.Count)
            {
                return(null);
            }

            //Instantiate a new object.
            //set it to active since we are about to use it.
            Opt_SP_UCTTreeNode objNode = new Opt_SP_UCTTreeNode(move, parent, state, rng, ucb1Tuned, rave, raveThreshold, nodeRecycling, const_C, const_D)
            {
                SetActive = true
            };

            //add it to the pool of objects
            pooledObjects.Add(objNode);

            //return the object to the requestor.
            return(objNode);
        }
Beispiel #5
0
 public IPuzzleMove selectMove(IPuzzleState gameState)
 {
     if (rnd.NextDouble() < inertiaProbability)
     {
         List <IPuzzleMove> moves = gameState.GetMoves();
         SokobanGameState   state = (SokobanGameState)gameState;
         foreach (IPuzzleMove m in moves)
         {
             SokobanPushMove push = (SokobanPushMove)m;
             if (push.MoveList.Count() == 0)
             {
                 return(m);
             }
         }
     }
     return(gameState.GetRandomMove());
 }
 public SP_UCTTreeNode(IPuzzleMove move, SP_UCTTreeNode parent, IPuzzleState state, MersenneTwister rng, double const_C = 1, double const_D = 20000, bool generateUntriedMoves = true)
 {
     this.move     = move;
     this.parent   = parent;
     this.const_C  = const_C;
     this.const_D  = const_D;
     rnd           = rng;
     childNodes    = new List <SP_UCTTreeNode>();
     wins          = 0;
     visits        = 0;
     squaredReward = 0;
     topScore      = double.MinValue;
     if (generateUntriedMoves)
     {
         untriedMoves = state.GetMoves();
     }
 }
Beispiel #7
0
        public IPuzzleMove selectMove(IPuzzleState gameState)
        {
            List <IPuzzleMove> moves    = gameState.GetMoves();
            IPuzzleMove        bestMove = null;

            if (rng.NextDouble() > epsilon)
            {
                IPuzzleState       clone     = gameState.Clone();
                double             maxReward = double.MinValue;
                List <IPuzzleMove> bestMoves = new List <IPuzzleMove>();
                foreach (IPuzzleMove move in clone.GetMoves())
                {
                    clone.DoMove(move);
                    double result = clone.GetResult();
                    if (result > maxReward)
                    {
                        bestMoves.Clear();
                        bestMoves.Add(move);
                        maxReward = result;
                        //bestMove = move;
                    }
                    else if (result == maxReward)
                    {
                        bestMoves.Add(move);
                    }
                    clone = gameState.Clone();
                }
                //return bestMoves[0];
                return(bestMoves[rng.Next(bestMoves.Count())]);
            }
            else
            {
                bestMove = gameState.GetRandomMove();
            }
            return(bestMove);
        }
        public IPuzzleMove Search(IPuzzleState rootState, int iterations, double maxTimeInMinutes = 5)
        {
            IterationsForFirstSolution = -1;
            nodeCount        = 0;
            nodesEliminated  = 0;
            nodesNotExpanded = 0;
            bool looped;

            if (!search)
            {
                search = true;
            }
            double minReward = double.MaxValue;
            double maxReward = double.MinValue;

            maxDepth = 0;
            // If needed clean the pool, restore all objects in the pool to the initial value
            if (objectPool.NeedToClean)
            {
                objectPool.CleanObjectPool();
            }

            ISPTreeNode rootNode = treeCreator.GenRootNode(rootState);
            ISPTreeNode head     = null;
            ISPTreeNode tail     = null;

            HashSet <IPuzzleMove> allFirstMoves  = new HashSet <IPuzzleMove>();
            List <IPuzzleMove>    currentRollout = new List <IPuzzleMove>();

            solutionHashes = new HashSet <int>();

#if PROFILING
            long beforeMemory = GC.GetTotalMemory(false);
            long afterMemory  = GC.GetTotalMemory(false);
            long usedMemory   = afterMemory - beforeMemory;
            long averageUsedMemoryPerIteration = 0;
#endif
            int deadlocksInTree = 0;
            int currentDepth    = 0;
            for (iterationsExecuted = 0; iterationsExecuted < iterations; iterationsExecuted++)
            {
                looped = false;
                ISPTreeNode  node  = rootNode;
                IPuzzleState state = rootState.Clone();
                //Debug.WriteLine(node.TreeToString(0));
                HashSet <IPuzzleState> visitedStatesInRollout = new HashSet <IPuzzleState>()
                {
                    state.Clone()
                };

                // Clear lists of moves used for RAVE updates && best rollout
                solutionHash   = 27;
                currentRollout = new List <IPuzzleMove>();
                allFirstMoves.Clear();

                // Select
                while (!node.HasMovesToTry() && node.HasChildren())
                {
                    // UCB1-Tuned and RAVE Optimizations
                    node = node.SelectChild();
                    state.DoMove(node.Move);
                    visitedStatesInRollout.Add(state.Clone());
                    // RAVE Optimization && best rollout
                    currentRollout.Add(node.Move);
                    UpdateSolutionHash(node.Move);
                    allFirstMoves.Add(node.Move);

                    // Node Recycling Optimization
                    if (((Opt_SP_UCTTreeNode)node).NodeRecycling)
                    {
                        // Non-leaf node removed from LRU queue during playout
                        if (node.NextLRUElem != null && node.PrevLRUElem != null)
                        {
                            LRUQueueManager.LRURemoveElement(ref node, ref head, ref tail);
                        }
                    }
                }
                IPuzzleState backupState = state.Clone();

                if (!node.HasChildren() && !node.HasMovesToTry())
                {
                    deadlocksInTree++;
                }
                else
                {
                    Debug.Write("");
                }

                // Expand
                if (node.HasMovesToTry())
                {
                    IPuzzleMove move = node.SelectUntriedMove();
                    if (move != -1)
                    {
                        state.DoMove(move);

                        // Node Recycling Optimization
                        if (((Opt_SP_UCTTreeNode)node).NodeRecycling)
                        {
                            if (memoryBudget == nodeCount && head != null)
                            {
                                head.ChildRecycle();
                                nodeCount--;
                                // Change LRU queue head when it becomes a leaf node
                                if (!head.HasChildren())
                                {
                                    LRUQueueManager.LRURemoveFirst(ref head, ref tail);
                                }
                            }
                        }

                        if (visitedStatesInRollout.Contains(state))
                        {
                            if (avoidCycles)
                            {
                                while (node.GetUntriedMoves().Count > 0 && visitedStatesInRollout.Contains(state))
                                {
                                    state = backupState.Clone();
                                    move  = node.GetUntriedMoves()[RNG.Next(node.GetUntriedMoves().Count)];
                                    state.DoMove(move);
                                    node.RemoveUntriedMove(move);
                                }
                                if (!visitedStatesInRollout.Contains(state)) //found valid move
                                {
                                    node = node.AddChild(objectPool, move, state);
                                    UpdateSolutionHash(move);
                                    currentRollout.Add(move);
                                    allFirstMoves.Add(move);
                                    nodeCount++;
                                }
                                else //all moves visited
                                {
                                    nodesNotExpanded++;
                                    state = backupState;
                                }
                            }
                            else
                            {
                                nodesNotExpanded++;
                                looped = true;
                            }
                        }
                        else
                        {
                            node = node.AddChild(objectPool, move, state);
                            // RAVE Optimization && best rollout
                            UpdateSolutionHash(move);
                            currentRollout.Add(move);
                            allFirstMoves.Add(move);
                            nodeCount++;
                        }
                        visitedStatesInRollout.Add(state.Clone());
                    }
                    else
                    {
                        state.Pass();
                    }
                }
                else
                {
                    nodesNotExpanded++;
                }


                // Rollout
                while (!state.isTerminal() && !looped)
                {
                    var move = state.GetSimulationMove();
                    backupState = state.Clone();
                    if (move != -1)
                    {
                        state.DoMove(move);
                        if (visitedStatesInRollout.Contains(state))
                        {
                            if (avoidCycles)
                            {
                                state = backupState.Clone();
                                List <IPuzzleMove> availableMoves = state.GetMoves();
                                while (availableMoves.Count > 0 && visitedStatesInRollout.Contains(state))
                                { //keep trying different moves until we end up in an unvisited state
                                    state = backupState.Clone();
                                    move  = availableMoves[RNG.Next(availableMoves.Count)];
                                    availableMoves.Remove(move);
                                    state.DoMove(move);
                                }
                                if (availableMoves.Count == 0 && visitedStatesInRollout.Contains(state))//all states have already been visited
                                {
                                    break;
                                }
                            }
                            else
                            {
                                looped = true;
                            }
                        }
                        // RAVE Optimization && best rollout
                        UpdateSolutionHash(move);
                        currentRollout.Add(move);
                        allFirstMoves.Add(move);
                        visitedStatesInRollout.Add(state.Clone());
                    }
                    else //simulation ended
                    {
                        break;
                        //state.Pass();
                    }
                }

                //Keep topScore and update bestRollout
                double result = state.GetResult();
                minReward = Math.Min(result, minReward);
                maxReward = Math.Max(result, maxReward);
                if (state.EndState() && !solutionHashes.Contains(solutionHash))
                {
                    solutionHashes.Add(solutionHash);
                    solutionCount++;
                    if (iterationsForFirstSolution < 0)
                    {
                        iterationsForFirstSolution = iterationsExecuted + 1;
                    }
                }
                if (result > topScore || result == topScore && currentRollout.Count < bestRollout.Count)
                {
                    topScore    = result;
                    bestRollout = currentRollout;
                    if (state.EndState() && stopOnResult)
                    {
                        iterationsExecuted++;
                        break;
                    }
                }


                // Backpropagate
                currentDepth = 0;
                while (node != null)
                {
                    if (looped)
                    {
                        //TODO penalize score for loops?
                    }
                    ISPTreeNode parent = node.Parent;
                    //if a node is a dead end remove it from the tree
                    if (!node.HasChildren() && !node.HasMovesToTry() && !state.EndState() && useNodeElimination)
                    {
                        if (node.Parent == null)//unsolvable level. The tree has been completely explored. Return current best score
                        {
                            //SinglePlayerMCTSMain.Log("Unsolvable Level");
                            //Console.WriteLine("\nUnsolvable Level");
                            break;
                        }
                        node.Parent.RemoveChild(node);
                        nodeCount--;
                        nodesEliminated++;
                        currentDepth--;
                    }

                    // RAVE Optimization
                    node.Update(result, allFirstMoves);
                    node = parent;
                    currentDepth++;
                    // Node Recycling Optimization
                    if (((Opt_SP_UCTTreeNode)rootNode).NodeRecycling)
                    {
                        // Non-leaf node pushed back to LRU queue when updated
                        if (node != rootNode && node != null && node.HasChildren())
                        {
                            LRUQueueManager.LRUAddLast(ref node, ref head, ref tail);
                        }
                    }
                }

                maxDepth = Math.Max(maxDepth, currentDepth);

                if (!rootNode.HasChildren() && !rootNode.HasMovesToTry())
                {
                    break;
                }

                if (!search)
                {
                    search = true;
                    return(null);
                }

                #if PROFILING
                afterMemory = GC.GetTotalMemory(false);
                usedMemory  = afterMemory - beforeMemory;
                averageUsedMemoryPerIteration = usedMemory / (i + 1);

                var outStringToWrite = string.Format(" optMCTS search: {0:0.00}% [{1} of {2}] - Total used memory B(MB): {3}({4:N7}) - Average used memory per iteration B(MB): {5}({6:N7})\n",
                                                     (float)((i + 1) * 100) / (float)iterations, i + 1, iterations, usedMemory, usedMemory / 1024 / 1024, averageUsedMemoryPerIteration,
                                                     (float)averageUsedMemoryPerIteration / 1024 / 1024);
                    #if DEBUG
                if (showMemoryUsage)
                {
                    Console.Write(outStringToWrite);
                    Console.SetCursorPosition(0, Console.CursorTop);
                }
                    #endif
                #endif

                //Console.WriteLine(rootNode.TreeToString(0));
            }
            //Console.WriteLine();

            objectPool.NeedToClean = true;

            //#if DEBUG
            //    Console.WriteLine(rootNode.ChildrenToString());
            //    Console.WriteLine(rootNode.TreeToString(0));
            //#endif


            IPuzzleMove bestMove;
            if (bestRollout != null && bestRollout.Count > 0) //Remove first move from rollout so that if the topScore is not beaten we can just take the next move on the next search
            {
                bestMove = bestRollout[0];
                bestRollout.RemoveAt(0);
            }
            else
            {
                bestMove = rootNode.GetBestMove();
            }
            Debug.WriteLine(rootNode.TreeToString(0));
            Debug.WriteLine("Min Reward: " + minReward + " - Max Reward: " + maxReward);
            visits     = new List <int>();
            raveVisits = new List <int>();
            CountVisits((Opt_SP_UCTTreeNode)rootNode, visits, raveVisits);

            visits.Sort((x, y) => (x.CompareTo(y)));
            raveVisits.Sort((x, y) => (x.CompareTo(y)));
            //string visitsString = LogVisits((Opt_SP_UCTTreeNode) rootNode);
            //SinglePlayerMCTSMain.Log("Iterations: "+IterationsExecuted+" NodeCount: " + nodeCount+" "+visitsString);
            return(bestMove);
        }