/// <summary> /// エージェントを1回行動させる /// </summary> private void ActAgent() { var locate = new Maze.CellLocate(Agent.CurrentLocate); MazeCellViews[locate.X, locate.Y].CellCssClass = CSS_CLASS_PATH; double qValue = Agent.Act(); MazeCellViews[Agent.CurrentLocate.X, Agent.CurrentLocate.Y].CellCssClass = "bg-success"; if (qValue > 0) { Maze.Direction direction = Agent.GetMaxQDirection(locate); EnvMaze.Cells[locate.X, locate.Y].Text = DirectionSet[direction]; if (!EnvMaze.Cells[locate.X, locate.Y].IsStart) { MazeCellViews[locate.X, locate.Y].Text = DirectionSet[direction]; } } }
public void Update() { if (!gameIsStart) { return; } if (gs.EndOfGame) { if (gs.players[0].PlayerLifeStock > gs.players[1].PlayerLifeStock) { winnerId = 0; Destroy(players[1].gameObject); } else if (gs.players[0].PlayerLifeStock < gs.players[1].PlayerLifeStock) { winnerId = 1; Destroy(players[0].gameObject); } else { winnerId = -1; } gameIsStart = false; menuController.ActiveMenu(Menus.Menus.END_GAME_MENU); return; } SyncNumbersOfProjectiles(); SyncProjectilesView(); SyncPlayersView(); SyncItemsView(); SyncTimerView(); GameStateRules.Step(ref gs, agent1.Act(ref gs, 0), agent2.Act(ref gs, 1)); GameStateRules.UpdateItems(ref gs); }
void GetInput(ref float h, ref float v, ref float s, ref float j, ref float k, ref float l, ref float st) { //human player if (!cpu) { h = Input.GetAxisRaw("Horizontal" + controllerName); v = Input.GetAxisRaw("Vertical" + controllerName); s = Input.GetAxisRaw("Sprint" + controllerName); j = Input.GetAxisRaw("Jump" + controllerName); k = Input.GetAxisRaw("Kick" + controllerName); l = Input.GetAxisRaw("Lift" + controllerName); st = Input.GetAxisRaw("Stop" + controllerName); } //ai else { //state of the game Dictionary <string, double> state = new Dictionary <string, double> { { "selfPosX", transform.position.x }, { "selfPosY", transform.position.y }, { "otherPosX", otherPlayer.transform.position.x }, { "otherPosY", otherPlayer.transform.position.y }, { "ballPosX", gameBall.transform.position.x }, { "ballPosY", gameBall.transform.position.y }, { "ballVelX", gameBall.velocity.x }, { "ballVelY", gameBall.velocity.y } }; //data to be set Dictionary <string, double> data = new Dictionary <string, double> { }; agent.Act(state, data); h = data.ContainsKey("horizontal") ? (float)data["horizontal"] : 0; j = data.ContainsKey("jump") ? (float)data["jump"] : 0; s = data.ContainsKey("sprint") ? (float)data["sprint"] : 0; } }
// Train two agents to compete in an adversarial game public static void TrainZeroSum(long practiceGames, bool showOutput, Agent agentOne, Agent agentTwo) { // Report the number of practice games if (showOutput) { Console.WriteLine($"Number of Practice Games: {practiceGames}"); } // Create starting state IState state = new CheckerState(); CheckerState checkerState = state as CheckerState; // Put agents in training mode agentOne.TrainingMode(1.0); agentTwo.TrainingMode(1.0); // Use repeated wins as a benchmark bool enoughTraining = false; long games = 0; int ticks = 0; bool dotPrinted = false; // Reset dotPrinted after it has moved on Action <object, EventArgs> resetDotPrinted = (object sender, EventArgs e) => dotPrinted = false; CheckerState.WhiteWins += resetDotPrinted; CheckerState.BlackWins += resetDotPrinted; // Begin training progress bar if (showOutput) { Console.Write("[ "); } // Watch each agent evolve while (!enoughTraining) { // Train the first agent checkerState = agentOne.Act(checkerState.SuccessorsBlack) as CheckerState; checkerState.GoalTest(); games = agentOne.Victories + agentTwo.Victories + agentOne.Draws; // Account for every 1% of progress if (games % (practiceGames / 100) == 0 && !dotPrinted) { // Update competitiveness at quarters if (ticks % 25 == 0) { // Display quarter in progress bar if (showOutput) { Console.Write($"[{ticks}]"); } // Make each agent more competitive agentOne.TrainingMode((100.0 - ticks) / 100.0); agentTwo.TrainingMode((100.0 - ticks) / 100.0); } // Update ticks ticks += 1; // Don't include hundredth tick if (ticks < 100 && showOutput) { Console.Write("."); } dotPrinted = true; } // Check if total number of practices have been met if (games >= practiceGames) { enoughTraining = true; } // Train the second agent, assuming they are not sufficiently trained if (!enoughTraining) { checkerState = agentTwo.Act(checkerState.SuccessorsWhite) as CheckerState; checkerState.GoalTest(); games = agentOne.Victories + agentTwo.Victories + agentOne.Draws; // Account for every 1% of progress if (games % (practiceGames / 100) == 0 && !dotPrinted) { // Update competitiveness at quarters if (ticks % 25 == 0) { // Display quarter in progress bar if (showOutput) { Console.Write($"[{ticks}]"); } // Make each agent more competitive agentOne.TrainingMode((100.0 - ticks) / 100.0); agentTwo.TrainingMode((100.0 - ticks) / 100.0); } // Update ticks ticks += 1; // Don't include hundredth tick if (ticks < 100 && showOutput) { Console.Write("."); } dotPrinted = true; } // Check if total number of practices have been met if (games >= practiceGames) { enoughTraining = true; } } } // End progress bar if (showOutput) { Console.WriteLine(" ]"); Console.WriteLine("\n"); } // Put agents in competitive mode agentOne.CompeteMode(); agentTwo.CompeteMode(); }
IEnumerator Play(Agent agent, DQN.Environment env, int episodeCount = 100, float minEpsillon = 0.05f, bool isTraining = true) { var actionSize = env.GetActionSize(); var epsillon = isTraining ? 1.0f : minEpsillon; var rewardQueue = new Queue <float>(100); for (int epi = 0; epi < episodeCount; ++epi) { env.Reset(); if (isTraining) { epsillon = Mathf.Max(minEpsillon, 1.0f - (float)Math.Pow((float)epi / episodeCount, 2.0f)); } float episodeReward = 0.0f; var currentState = Array.ConvertAll <int, float>(env.GetCurrentState(), x => Convert.ToSingle(x)); List <int> actions = new List <int>(MaxSteps); int t = 0; for (t = 0; t < MaxSteps; t++) { //debug! if (t == 0 && (epi % 500 == 0)) { foreach (var q in agent.GetLocalQValues(currentState, m_device)) { print("QVAL: " + epi + " " + q); } } var action = agent.Act(currentState, epsillon, actionSize, m_device, !isTraining); actions.Add(action); float reward = 0.0f; bool isFinished = env.Act((DQN.Environment.Actions)action, out reward); episodeReward += reward; var nextState = Array.ConvertAll <int, float>(env.GetCurrentState(), x => Convert.ToSingle(x)); if (isTraining) { agent.Observe(currentState, (float)action, reward, nextState, isFinished ? 1.0f : 0.0f); agent.Train(BatchSize, Gamma, m_device); } if (isFinished) { break; } currentState = nextState; } if (isTraining) { if (epi > BatchSize) { m_allRewards.Add((float)agent.GetTrainingLoss()); if (graphScript != null) { graphScript.ShowGraph(m_allRewards); } } if ((epi + 1) % EpisodesPerTransfer == 0) { agent.TransferLearning(m_device); } } rewardQueue.Enqueue((float)episodeReward); if ((epi + 1) % PrintInterval == 0) { float rewardAvg = 0.0f; while (rewardQueue.Count > 0) { rewardAvg += rewardQueue.Dequeue(); } rewardAvg /= PrintInterval; print("Reward: " + (epi + 1) + " " + rewardAvg + " " + isTraining); //print("Loss: " + agent.GetTrainingLoss()); } //print("Episode: " + epi); yield return(null); } m_currentCoroutine = null; m_coroutineCount++; }
public void Run() { if (!quiet) { Console.WriteLine("RL Checkers"); } IState state = new CheckerState(); CheckerState checkerState = state as CheckerState; Agent playerOne = new Agent(); Agent playerTwo = new Agent(); // Wire player behaviors to state events CheckerState.WireEvents(checkerState, playerOne, playerTwo); // Train agents if (!useInFile) { CheckerState.TrainZeroSum(training, !quiet, playerOne, playerTwo); } // Handle Serialization if (useInFile) { Utilities.ReadInFile(inFile, playerOne, playerTwo); } if (useOutFile) { Utilities.WriteOutFile(outFile, playerOne); } // Determine victory, defeat, and cat's game events bool playerTwoVictory = false; bool playerTwoDefeat = false; Action <object, EventArgs> declareVictory = (object sender, EventArgs e) => playerTwoVictory = true; Action <object, EventArgs> declareDefeat = (object sender, EventArgs e) => playerTwoDefeat = true; CheckerState.WhiteWins += declareVictory; CheckerState.BlackWins += declareDefeat; // Player competes with computer while (!playerTwoVictory && !playerTwoDefeat) { checkerState = playerOne.Act(checkerState.SuccessorsBlack) as CheckerState; checkerState.GoalTest(); if (!playerTwoVictory && !playerTwoDefeat) { // Display move options List <IState> options = checkerState.SuccessorsWhite; int moveIndex = 0; bool validMove = false; // Prompt the user while (!validMove) { try { // Receive user input CheckerState.PrintOptions(options); Console.Write("Please select a move: "); moveIndex = Int32.Parse(Console.ReadLine()) - 1; // If move is not possible, throw exception if (moveIndex < 0 || moveIndex >= options.Count) { throw new InvalidMoveException(moveIndex + 1); } // If no error was raised, mark the move as valid validMove = true; } catch (InvalidMoveException e) { Console.Write(e.Message); Console.WriteLine("; please select from the options listed."); } } // Assign the state checkerState = options[moveIndex] as CheckerState; checkerState.GoalTest(); } // Select outcome if (playerTwoVictory) { Console.WriteLine("Human player wins!"); } if (playerTwoDefeat) { Console.WriteLine("Computer wins!"); } } }
public void Run() { if (!quiet) { Console.WriteLine("Tic Tac Toe Machine Learning Demonstration"); } // Create starting state IState state = new TicTacToeState(); TicTacToeState tttState = state as TicTacToeState; // Create players one and two Agent playerOne = new Agent(); Agent playerTwo = new Agent(); // Wire player behaviors to state events TicTacToeState.WireEvents(tttState, playerOne, playerTwo); // Conduct training if (!useInFile) { TicTacToeState.TrainZeroSum(training, !quiet, playerOne, playerTwo); } // Handle serialization if (useInFile) { Utilities.ReadInFile(inFile, playerOne, playerTwo); } if (useOutFile) { Utilities.WriteOutFile(outFile, playerOne); } // Determine victory, defeat, and cat's game events bool playerTwoVictory = false; Action <object, EventArgs> declareVictory = (object sender, EventArgs e) => playerTwoVictory = true; TicTacToeState.PlayerWinsO += declareVictory; bool playerTwoDefeat = false; Action <object, EventArgs> declareDefeat = (object sender, EventArgs e) => playerTwoDefeat = true; TicTacToeState.PlayerWinsX += declareDefeat; bool playerTwoDraw = false; Action <object, EventArgs> declareDraw = (object sender, EventArgs e) => playerTwoDraw = true; TicTacToeState.CatsGame += declareDraw; // Player competes with computer while (!playerTwoVictory && !playerTwoDraw && !playerTwoDefeat) { tttState = playerOne.Act(tttState.SuccessorsX) as TicTacToeState; // No need to GoalTest if game is over if (!playerTwoDraw) { tttState.GoalTest(); } if (!playerTwoVictory && !playerTwoDraw && !playerTwoDefeat) { // Display move options List <IState> options = tttState.SuccessorsO; if (!playerTwoDraw) { // Prompt the user TicTacToeState.PrintOptions(options); Console.Write("Please select a move: "); int moveIndex = Int32.Parse(Console.ReadLine()) - 1; // Assign the state tttState = options[moveIndex] as TicTacToeState; tttState.GoalTest(); } } // Select outcome if (playerTwoVictory) { Console.WriteLine("Human player wins!"); } if (playerTwoDefeat) { Console.WriteLine("Computer wins!"); } if (playerTwoDraw) { Console.WriteLine("Cat's game!"); } } }