public Opt_SP_UCTTreeNode(IPuzzleMove move, Opt_SP_UCTTreeNode parent, IPuzzleState state, MersenneTwister rng, bool ucb1Tuned, bool rave, double raveThreshold, bool nodeRecycling, double const_C = 1, double const_D = 20000, bool generateUntriedMoves = true) { Move = move; this.parent = parent; this.const_C = const_C; this.const_D = const_D; rnd = rng; childNodes = new List <Opt_SP_UCTTreeNode>(); NextLRUElem = null; PrevLRUElem = null; SetActive = false; wins = 0; visits = 0; squares_rewards = 0; RAVEwins = 0; RAVEvisits = 0; squaredReward = 0; topScore = double.MinValue; this.ucb1Tuned = ucb1Tuned; this.rave = rave; this.raveThreshold = raveThreshold; this.nodeRecycling = nodeRecycling; if (generateUntriedMoves) { untriedMoves = state.GetMoves(); } }
public IPuzzleMove selectMove(IPuzzleState gameState) { List <IPuzzleMove> moves = gameState.GetMoves(); if (rnd.NextDouble() <= 0.00007) //epsilon greedy { return(moves[rnd.Next(moves.Count)]); } moves.RemoveAll(item => gameState.GetBoard(SamegameGameMove.GetX(item), SamegameGameMove.GetY(item)) == selectedColor); if (moves.Count == 0) { moves = gameState.GetMoves(); } IPuzzleMove selectedMove = moves[rnd.Next(moves.Count)]; return(selectedMove); }
public IPuzzleMove selectMove(IPuzzleState gameState) { //if (moveList != null) //{ // if (moveList.Count > 0) // { // IPuzzleMove move = moveList[0]; // moveList.RemoveAt(0); // return move; // } // else // { // moveList = null; // return (IPuzzleMove)(-1); // } //} //else //{ // moveList = idaStar.Solve(gameState, maxNodes, tableSize, maxDepth); // if (moveList.Count == 0) // { // return (IPuzzleMove)(-1); // } // IPuzzleMove move = moveList[0]; // moveList.RemoveAt(0); // return move; //} if (RNG.NextDouble() < epsilon) { return(gameState.GetMoves()[RNG.Next(gameState.GetMoves().Count)]); } moveList = idaStar.Solve(gameState, maxNodes, tableSize, maxDepth); if (moveList.Count > 0) { return(moveList[0]); } else { return((IPuzzleMove)(-1)); } }
/// <summary> /// Returns an active object from the object pool without resetting any of its values. /// You will need to set its values and set it inactive again when you are done with it. /// </summary> /// <returns>ITreeNode of requested type if it is available, otherwise null.</returns> public Opt_SP_UCTTreeNode GetObject(IPuzzleMove move, Opt_SP_UCTTreeNode parent, IPuzzleState state, MersenneTwister rng, bool ucb1Tuned, bool rave, double raveThreshold, bool nodeRecycling, double const_C, double const_D) { //iterate through all pooled objects. foreach (Opt_SP_UCTTreeNode node in pooledObjects) { //look for the first one that is inactive. if (node.SetActive) { continue; } //set the object to active. node.SetActive = true; //set object's values node.Move = move; node.Parent = parent; node.untriedMoves = state.GetMoves(); node.Rnd = rng; node.Ucb1Tuned = ucb1Tuned; node.Rave = rave; node.RaveThreshold = raveThreshold; node.NodeRecycling = nodeRecycling; node.ConstC = const_C; node.ConstD = const_D; //return the object we found. return(node); } //if we make it this far, we obviously didn't find an inactive object. //so we need to see if we can grow beyond our current count. //if we reach the maximum size we didn't have any inactive objects. //we also were unable to grow, so return null as we can't return an object. if (maxPoolSize <= pooledObjects.Count) { return(null); } //Instantiate a new object. //set it to active since we are about to use it. Opt_SP_UCTTreeNode objNode = new Opt_SP_UCTTreeNode(move, parent, state, rng, ucb1Tuned, rave, raveThreshold, nodeRecycling, const_C, const_D) { SetActive = true }; //add it to the pool of objects pooledObjects.Add(objNode); //return the object to the requestor. return(objNode); }
public IPuzzleMove selectMove(IPuzzleState gameState) { if (rnd.NextDouble() < inertiaProbability) { List <IPuzzleMove> moves = gameState.GetMoves(); SokobanGameState state = (SokobanGameState)gameState; foreach (IPuzzleMove m in moves) { SokobanPushMove push = (SokobanPushMove)m; if (push.MoveList.Count() == 0) { return(m); } } } return(gameState.GetRandomMove()); }
public SP_UCTTreeNode(IPuzzleMove move, SP_UCTTreeNode parent, IPuzzleState state, MersenneTwister rng, double const_C = 1, double const_D = 20000, bool generateUntriedMoves = true) { this.move = move; this.parent = parent; this.const_C = const_C; this.const_D = const_D; rnd = rng; childNodes = new List <SP_UCTTreeNode>(); wins = 0; visits = 0; squaredReward = 0; topScore = double.MinValue; if (generateUntriedMoves) { untriedMoves = state.GetMoves(); } }
public IPuzzleMove selectMove(IPuzzleState gameState) { List <IPuzzleMove> moves = gameState.GetMoves(); IPuzzleMove bestMove = null; if (rng.NextDouble() > epsilon) { IPuzzleState clone = gameState.Clone(); double maxReward = double.MinValue; List <IPuzzleMove> bestMoves = new List <IPuzzleMove>(); foreach (IPuzzleMove move in clone.GetMoves()) { clone.DoMove(move); double result = clone.GetResult(); if (result > maxReward) { bestMoves.Clear(); bestMoves.Add(move); maxReward = result; //bestMove = move; } else if (result == maxReward) { bestMoves.Add(move); } clone = gameState.Clone(); } //return bestMoves[0]; return(bestMoves[rng.Next(bestMoves.Count())]); } else { bestMove = gameState.GetRandomMove(); } return(bestMove); }
public IPuzzleMove Search(IPuzzleState rootState, int iterations, double maxTimeInMinutes = 5) { IterationsForFirstSolution = -1; nodeCount = 0; nodesEliminated = 0; nodesNotExpanded = 0; bool looped; if (!search) { search = true; } double minReward = double.MaxValue; double maxReward = double.MinValue; maxDepth = 0; // If needed clean the pool, restore all objects in the pool to the initial value if (objectPool.NeedToClean) { objectPool.CleanObjectPool(); } ISPTreeNode rootNode = treeCreator.GenRootNode(rootState); ISPTreeNode head = null; ISPTreeNode tail = null; HashSet <IPuzzleMove> allFirstMoves = new HashSet <IPuzzleMove>(); List <IPuzzleMove> currentRollout = new List <IPuzzleMove>(); solutionHashes = new HashSet <int>(); #if PROFILING long beforeMemory = GC.GetTotalMemory(false); long afterMemory = GC.GetTotalMemory(false); long usedMemory = afterMemory - beforeMemory; long averageUsedMemoryPerIteration = 0; #endif int deadlocksInTree = 0; int currentDepth = 0; for (iterationsExecuted = 0; iterationsExecuted < iterations; iterationsExecuted++) { looped = false; ISPTreeNode node = rootNode; IPuzzleState state = rootState.Clone(); //Debug.WriteLine(node.TreeToString(0)); HashSet <IPuzzleState> visitedStatesInRollout = new HashSet <IPuzzleState>() { state.Clone() }; // Clear lists of moves used for RAVE updates && best rollout solutionHash = 27; currentRollout = new List <IPuzzleMove>(); allFirstMoves.Clear(); // Select while (!node.HasMovesToTry() && node.HasChildren()) { // UCB1-Tuned and RAVE Optimizations node = node.SelectChild(); state.DoMove(node.Move); visitedStatesInRollout.Add(state.Clone()); // RAVE Optimization && best rollout currentRollout.Add(node.Move); UpdateSolutionHash(node.Move); allFirstMoves.Add(node.Move); // Node Recycling Optimization if (((Opt_SP_UCTTreeNode)node).NodeRecycling) { // Non-leaf node removed from LRU queue during playout if (node.NextLRUElem != null && node.PrevLRUElem != null) { LRUQueueManager.LRURemoveElement(ref node, ref head, ref tail); } } } IPuzzleState backupState = state.Clone(); if (!node.HasChildren() && !node.HasMovesToTry()) { deadlocksInTree++; } else { Debug.Write(""); } // Expand if (node.HasMovesToTry()) { IPuzzleMove move = node.SelectUntriedMove(); if (move != -1) { state.DoMove(move); // Node Recycling Optimization if (((Opt_SP_UCTTreeNode)node).NodeRecycling) { if (memoryBudget == nodeCount && head != null) { head.ChildRecycle(); nodeCount--; // Change LRU queue head when it becomes a leaf node if (!head.HasChildren()) { LRUQueueManager.LRURemoveFirst(ref head, ref tail); } } } if (visitedStatesInRollout.Contains(state)) { if (avoidCycles) { while (node.GetUntriedMoves().Count > 0 && visitedStatesInRollout.Contains(state)) { state = backupState.Clone(); move = node.GetUntriedMoves()[RNG.Next(node.GetUntriedMoves().Count)]; state.DoMove(move); node.RemoveUntriedMove(move); } if (!visitedStatesInRollout.Contains(state)) //found valid move { node = node.AddChild(objectPool, move, state); UpdateSolutionHash(move); currentRollout.Add(move); allFirstMoves.Add(move); nodeCount++; } else //all moves visited { nodesNotExpanded++; state = backupState; } } else { nodesNotExpanded++; looped = true; } } else { node = node.AddChild(objectPool, move, state); // RAVE Optimization && best rollout UpdateSolutionHash(move); currentRollout.Add(move); allFirstMoves.Add(move); nodeCount++; } visitedStatesInRollout.Add(state.Clone()); } else { state.Pass(); } } else { nodesNotExpanded++; } // Rollout while (!state.isTerminal() && !looped) { var move = state.GetSimulationMove(); backupState = state.Clone(); if (move != -1) { state.DoMove(move); if (visitedStatesInRollout.Contains(state)) { if (avoidCycles) { state = backupState.Clone(); List <IPuzzleMove> availableMoves = state.GetMoves(); while (availableMoves.Count > 0 && visitedStatesInRollout.Contains(state)) { //keep trying different moves until we end up in an unvisited state state = backupState.Clone(); move = availableMoves[RNG.Next(availableMoves.Count)]; availableMoves.Remove(move); state.DoMove(move); } if (availableMoves.Count == 0 && visitedStatesInRollout.Contains(state))//all states have already been visited { break; } } else { looped = true; } } // RAVE Optimization && best rollout UpdateSolutionHash(move); currentRollout.Add(move); allFirstMoves.Add(move); visitedStatesInRollout.Add(state.Clone()); } else //simulation ended { break; //state.Pass(); } } //Keep topScore and update bestRollout double result = state.GetResult(); minReward = Math.Min(result, minReward); maxReward = Math.Max(result, maxReward); if (state.EndState() && !solutionHashes.Contains(solutionHash)) { solutionHashes.Add(solutionHash); solutionCount++; if (iterationsForFirstSolution < 0) { iterationsForFirstSolution = iterationsExecuted + 1; } } if (result > topScore || result == topScore && currentRollout.Count < bestRollout.Count) { topScore = result; bestRollout = currentRollout; if (state.EndState() && stopOnResult) { iterationsExecuted++; break; } } // Backpropagate currentDepth = 0; while (node != null) { if (looped) { //TODO penalize score for loops? } ISPTreeNode parent = node.Parent; //if a node is a dead end remove it from the tree if (!node.HasChildren() && !node.HasMovesToTry() && !state.EndState() && useNodeElimination) { if (node.Parent == null)//unsolvable level. The tree has been completely explored. Return current best score { //SinglePlayerMCTSMain.Log("Unsolvable Level"); //Console.WriteLine("\nUnsolvable Level"); break; } node.Parent.RemoveChild(node); nodeCount--; nodesEliminated++; currentDepth--; } // RAVE Optimization node.Update(result, allFirstMoves); node = parent; currentDepth++; // Node Recycling Optimization if (((Opt_SP_UCTTreeNode)rootNode).NodeRecycling) { // Non-leaf node pushed back to LRU queue when updated if (node != rootNode && node != null && node.HasChildren()) { LRUQueueManager.LRUAddLast(ref node, ref head, ref tail); } } } maxDepth = Math.Max(maxDepth, currentDepth); if (!rootNode.HasChildren() && !rootNode.HasMovesToTry()) { break; } if (!search) { search = true; return(null); } #if PROFILING afterMemory = GC.GetTotalMemory(false); usedMemory = afterMemory - beforeMemory; averageUsedMemoryPerIteration = usedMemory / (i + 1); var outStringToWrite = string.Format(" optMCTS search: {0:0.00}% [{1} of {2}] - Total used memory B(MB): {3}({4:N7}) - Average used memory per iteration B(MB): {5}({6:N7})\n", (float)((i + 1) * 100) / (float)iterations, i + 1, iterations, usedMemory, usedMemory / 1024 / 1024, averageUsedMemoryPerIteration, (float)averageUsedMemoryPerIteration / 1024 / 1024); #if DEBUG if (showMemoryUsage) { Console.Write(outStringToWrite); Console.SetCursorPosition(0, Console.CursorTop); } #endif #endif //Console.WriteLine(rootNode.TreeToString(0)); } //Console.WriteLine(); objectPool.NeedToClean = true; //#if DEBUG // Console.WriteLine(rootNode.ChildrenToString()); // Console.WriteLine(rootNode.TreeToString(0)); //#endif IPuzzleMove bestMove; if (bestRollout != null && bestRollout.Count > 0) //Remove first move from rollout so that if the topScore is not beaten we can just take the next move on the next search { bestMove = bestRollout[0]; bestRollout.RemoveAt(0); } else { bestMove = rootNode.GetBestMove(); } Debug.WriteLine(rootNode.TreeToString(0)); Debug.WriteLine("Min Reward: " + minReward + " - Max Reward: " + maxReward); visits = new List <int>(); raveVisits = new List <int>(); CountVisits((Opt_SP_UCTTreeNode)rootNode, visits, raveVisits); visits.Sort((x, y) => (x.CompareTo(y))); raveVisits.Sort((x, y) => (x.CompareTo(y))); //string visitsString = LogVisits((Opt_SP_UCTTreeNode) rootNode); //SinglePlayerMCTSMain.Log("Iterations: "+IterationsExecuted+" NodeCount: " + nodeCount+" "+visitsString); return(bestMove); }