public void UpdateQValue(GameState observedPlay, Cell.CellOwner owner) { int reward = GameManager.I.LearningSession.Reward(owner); if (owner == Cell.CellOwner.Agent1) { if (!QDictionaryAgent1.ContainsKey(observedPlay)) { QDictionaryAgent1[observedPlay] = 0; } float newQ = QDictionaryAgent1[observedPlay] + LearningRate * (reward + DiscountFactor * CheckBestQValueAtGameState(GameManager.I.GetCellsOwner(GameManager.I.Cells), owner) - QDictionaryAgent1[observedPlay]); QDictionaryAgent1[observedPlay] = newQ; } else { if (!QDictionaryAgent2.ContainsKey(observedPlay)) { QDictionaryAgent2[observedPlay] = 0; } float newQ = QDictionaryAgent2[observedPlay] + LearningRate * (reward + DiscountFactor * CheckBestQValueAtGameState(GameManager.I.GetCellsOwner(GameManager.I.Cells), owner) - QDictionaryAgent2[observedPlay]); QDictionaryAgent2[observedPlay] = newQ; } }
private float CheckBestQValueAtGameState(Cell.CellOwner[] owners, Cell.CellOwner owner) { float tempMaxQ1 = 0; float tempMaxQ2 = 0; for (int i = 0; i < 9; i++) { GameState gameState = new GameState(owners, i); if (owner == Cell.CellOwner.Agent1) { if (QDictionaryAgent1.ContainsKey(gameState)) { if (QDictionaryAgent1[gameState] > tempMaxQ1) { tempMaxQ1 = QDictionaryAgent1[gameState]; } } } else { if (QDictionaryAgent2.ContainsKey(gameState)) { if (QDictionaryAgent2[gameState] > tempMaxQ2) { tempMaxQ2 = QDictionaryAgent2[gameState]; } } } } return(owner == Cell.CellOwner.Agent1 ? tempMaxQ1 : tempMaxQ2); }
public Cell.CellOwner[] GetCellsOwner(Cell[] cells) { Cell.CellOwner[] cellOwners = new Cell.CellOwner[9]; for (var index = 0; index < cells.Length; index++) { var cell = cells[index]; if (Brains[0].IsUsingFileData) { if (Brains[0].isFirstAgainstPlayer) { if (cell.owner == Cell.CellOwner.Player) { cell.owner = Cell.CellOwner.Agent2; } } else if (!Brains[0].isFirstAgainstPlayer) { if (cell.owner == Cell.CellOwner.Player) { cell.owner = Cell.CellOwner.Agent1; } } } cellOwners[index] = cell.owner; } return(cellOwners); }
private void InitGame() { IsGameFinished = false; Cells = new Cell[9]; _board.GenerateBoard(); Winner = Cell.CellOwner.None; GameState = GamePhase.GameStart; StartCoroutine(GameLoop()); }
public bool IsGameEnded() { if (CheckIfAllOwned()) { return(true); } foreach (var cell in Cells) { Cell.WinnerData winData = cell.IsAWinnerCombination(); if (winData.IsWinner) { Winner = winData.Owner; return(true); } } return(false); }
public int Reward(Cell.CellOwner owner) { if (!GameManager.I.IsGameEnded()) { return(0); } if (GameManager.I.Winner == Cell.CellOwner.Agent1 && owner == Cell.CellOwner.Agent1 || GameManager.I.Winner == Cell.CellOwner.Agent2 && owner == Cell.CellOwner.Agent2) { return(1); } if (GameManager.I.Winner == Cell.CellOwner.Agent1 && owner == Cell.CellOwner.Agent2 || GameManager.I.Winner == Cell.CellOwner.Agent2 && owner == Cell.CellOwner.Agent1) { return(-1); } //si empatan 0 return(0); }
public GameState CheckBestActionAtGameState(Cell.CellOwner[] owners, Cell.CellOwner owner) { GameState tempBestAction = null; float tempMaxQ = int.MinValue; List <GameState> similarGameStates = new List <GameState>(); if (owner == Cell.CellOwner.Agent1) { for (int i = 0; i < 9; i++) { GameState gameState = new GameState(owners, i); if (IsValidAction(gameState)) { if (QDictionaryAgent1.ContainsKey(gameState)) { if (QDictionaryAgent1[gameState] == 0) { similarGameStates.Add(gameState); } else if (QDictionaryAgent1[gameState] > tempMaxQ) { tempMaxQ = QDictionaryAgent1[gameState]; tempBestAction = gameState; } } else { similarGameStates.Add(gameState); QDictionaryAgent1[gameState] = 0; } } } } else { for (int i = 0; i < 9; i++) { GameState gameState = new GameState(owners, i); if (IsValidAction(gameState)) { if (QDictionaryAgent2.ContainsKey(gameState)) { if (QDictionaryAgent2[gameState] == 0) { similarGameStates.Add(gameState); } else if (QDictionaryAgent2[gameState] > tempMaxQ) { tempMaxQ = QDictionaryAgent2[gameState]; tempBestAction = gameState; } } else { similarGameStates.Add(gameState); QDictionaryAgent2[gameState] = 0; } } } } if (tempBestAction == null) { if (similarGameStates.Count > 0) { return(similarGameStates[Random.Range(0, similarGameStates.Count - 1)]); } } return(tempBestAction); }