public void UpdateQValue(GameState observedPlay, Cell.CellOwner owner) { int reward = GameManager.I.LearningSession.Reward(owner); if (owner == Cell.CellOwner.Agent1) { if (!QDictionaryAgent1.ContainsKey(observedPlay)) { QDictionaryAgent1[observedPlay] = 0; } float newQ = QDictionaryAgent1[observedPlay] + LearningRate * (reward + DiscountFactor * CheckBestQValueAtGameState(GameManager.I.GetCellsOwner(GameManager.I.Cells), owner) - QDictionaryAgent1[observedPlay]); QDictionaryAgent1[observedPlay] = newQ; } else { if (!QDictionaryAgent2.ContainsKey(observedPlay)) { QDictionaryAgent2[observedPlay] = 0; } float newQ = QDictionaryAgent2[observedPlay] + LearningRate * (reward + DiscountFactor * CheckBestQValueAtGameState(GameManager.I.GetCellsOwner(GameManager.I.Cells), owner) - QDictionaryAgent2[observedPlay]); QDictionaryAgent2[observedPlay] = newQ; } }
private float CheckBestQValueAtGameState(Cell.CellOwner[] owners, Cell.CellOwner owner) { float tempMaxQ1 = 0; float tempMaxQ2 = 0; for (int i = 0; i < 9; i++) { GameState gameState = new GameState(owners, i); if (owner == Cell.CellOwner.Agent1) { if (QDictionaryAgent1.ContainsKey(gameState)) { if (QDictionaryAgent1[gameState] > tempMaxQ1) { tempMaxQ1 = QDictionaryAgent1[gameState]; } } } else { if (QDictionaryAgent2.ContainsKey(gameState)) { if (QDictionaryAgent2[gameState] > tempMaxQ2) { tempMaxQ2 = QDictionaryAgent2[gameState]; } } } } return(owner == Cell.CellOwner.Agent1 ? tempMaxQ1 : tempMaxQ2); }
public GameState CheckBestActionAtGameState(Cell.CellOwner[] owners, Cell.CellOwner owner) { GameState tempBestAction = null; float tempMaxQ = int.MinValue; List <GameState> similarGameStates = new List <GameState>(); if (owner == Cell.CellOwner.Agent1) { for (int i = 0; i < 9; i++) { GameState gameState = new GameState(owners, i); if (IsValidAction(gameState)) { if (QDictionaryAgent1.ContainsKey(gameState)) { if (QDictionaryAgent1[gameState] == 0) { similarGameStates.Add(gameState); } else if (QDictionaryAgent1[gameState] > tempMaxQ) { tempMaxQ = QDictionaryAgent1[gameState]; tempBestAction = gameState; } } else { similarGameStates.Add(gameState); QDictionaryAgent1[gameState] = 0; } } } } else { for (int i = 0; i < 9; i++) { GameState gameState = new GameState(owners, i); if (IsValidAction(gameState)) { if (QDictionaryAgent2.ContainsKey(gameState)) { if (QDictionaryAgent2[gameState] == 0) { similarGameStates.Add(gameState); } else if (QDictionaryAgent2[gameState] > tempMaxQ) { tempMaxQ = QDictionaryAgent2[gameState]; tempBestAction = gameState; } } else { similarGameStates.Add(gameState); QDictionaryAgent2[gameState] = 0; } } } } if (tempBestAction == null) { if (similarGameStates.Count > 0) { return(similarGameStates[Random.Range(0, similarGameStates.Count - 1)]); } } return(tempBestAction); }