public override void Learn(GameHistory history) { m_Trained = true; BoardRepresentation[] looserBoards = new BoardRepresentation[history.Count() / 2]; int looserIdx = looserBoards.Length; for (int i = history.Count() - 2; i >= 0; i -= 2) { looserBoards[--looserIdx] = history.Peek(i); } BoardRepresentation[] winnerBoards = new BoardRepresentation[history.Count() - looserBoards.Length]; int winnerIdx = winnerBoards.Length; for (int i = history.Count() - 1; i >= 0; i -= 2) { winnerBoards[--winnerIdx] = history.Peek(i); } m_FuncCallibrator.Callibrate(winnerBoards, looserBoards); }
public void UndoMove() { if (GameHistory.Count != 0) { List <int> move = GameHistory.Peek(); GameBoard.Board[move[0], move[1]] = move[2]; GameBoard.Board[move[3], move[4]] = move[5]; RedoStack.Push(GameHistory.Pop()); PlayerOnTurn = PlayerSwap(); if (!GameInProgress) { GameInProgress = true; } } }
public override void Learn(GameHistory history) { BoardRepresentation[] looserBoards = new BoardRepresentation[history.Count() / 2]; int looserIdx = looserBoards.Length; for (int i = history.Count() - 2; i >= 0; i -= 2) { looserBoards[--looserIdx] = history.Peek(i); } BoardRepresentation[] winnerBoards = new BoardRepresentation[history.Count() - looserBoards.Length]; int winnerIdx = winnerBoards.Length; for (int i = history.Count() - 1; i >= 0; i -= 2) { winnerBoards[--winnerIdx] = history.Peek(i); } int wIdx = winnerBoards.Length - 1; int lIdx = looserBoards.Length - 1; float[] wTarget = Grade(winnerBoards[wIdx]);; float[] lTarget = Grade(looserBoards[lIdx]); if (history.WinType == 1) { wTarget[0] = wTarget[0] + (0.1f * (1.0f - wTarget[0])); wTarget[1] = wTarget[1] + (0.1f * (0.0f - wTarget[1])); wTarget[2] = wTarget[2] + (0.1f * (0.0f - wTarget[2])); wTarget[3] = wTarget[3] + (0.1f * (0.0f - wTarget[3])); wTarget[4] = wTarget[4] + (0.1f * (0.0f - wTarget[4])); lTarget[0] = lTarget[0] + (0.1f * (0.0f - lTarget[0])); lTarget[1] = lTarget[1] + (0.1f * (0.0f - lTarget[1])); lTarget[2] = lTarget[2] + (0.1f * (0.0f - lTarget[2])); lTarget[3] = lTarget[3] + (0.1f * (0.0f - lTarget[3])); lTarget[4] = lTarget[4] + (0.1f * (0.0f - lTarget[4])); } else if (history.WinType == 2) { wTarget[0] = wTarget[0] + (0.1f * (0.0f - wTarget[0])); wTarget[1] = wTarget[1] + (0.1f * (1.0f - wTarget[1])); wTarget[2] = wTarget[2] + (0.1f * (0.0f - wTarget[2])); wTarget[3] = wTarget[3] + (0.1f * (0.0f - wTarget[3])); wTarget[4] = wTarget[4] + (0.1f * (0.0f - wTarget[4])); lTarget[0] = lTarget[0] + (0.1f * (0.0f - lTarget[0])); lTarget[1] = lTarget[1] + (0.1f * (0.0f - lTarget[1])); lTarget[2] = lTarget[2] + (0.1f * (0.0f - lTarget[2])); lTarget[3] = lTarget[3] + (0.1f * (1.0f - lTarget[3])); lTarget[4] = lTarget[4] + (0.1f * (0.0f - lTarget[4])); } else if (history.WinType == 3) { wTarget[0] = wTarget[0] + (0.1f * (0.0f - wTarget[0])); wTarget[1] = wTarget[1] + (0.1f * (0.0f - wTarget[1])); wTarget[2] = wTarget[2] + (0.1f * (1.0f - wTarget[2])); wTarget[3] = wTarget[3] + (0.1f * (0.0f - wTarget[3])); wTarget[4] = wTarget[4] + (0.1f * (0.0f - wTarget[4])); lTarget[0] = lTarget[0] + (0.1f * (0.0f - lTarget[0])); lTarget[1] = lTarget[1] + (0.1f * (0.0f - lTarget[1])); lTarget[2] = lTarget[2] + (0.1f * (0.0f - lTarget[2])); lTarget[3] = lTarget[3] + (0.1f * (0.0f - lTarget[3])); lTarget[4] = lTarget[4] + (0.1f * (1.0f - lTarget[4])); } while (lIdx >= 0 || wIdx >= 0) { if (wIdx >= 0) { Train(winnerBoards[wIdx], wTarget); if (wIdx > 0) { float[] prevWState = Grade(winnerBoards[wIdx - 1]); wTarget[0] = prevWState[0] + (0.1f * (wTarget[0] - prevWState[0])); wTarget[1] = prevWState[1] + (0.1f * (wTarget[1] - prevWState[1])); wTarget[2] = prevWState[2] + (0.1f * (wTarget[2] - prevWState[2])); wTarget[3] = prevWState[3] + (0.1f * (wTarget[3] - prevWState[3])); wTarget[4] = prevWState[4] + (0.1f * (wTarget[4] - prevWState[4])); } wIdx--; } if (lIdx >= 0) { Train(looserBoards[lIdx], lTarget); if (lIdx > 0) { float[] prevLState = Grade(looserBoards[lIdx - 1]); lTarget[0] = prevLState[0] + (0.1f * (lTarget[0] - prevLState[0])); lTarget[1] = prevLState[1] + (0.1f * (lTarget[1] - prevLState[1])); lTarget[2] = prevLState[2] + (0.1f * (lTarget[2] - prevLState[2])); lTarget[3] = prevLState[3] + (0.1f * (lTarget[3] - prevLState[3])); lTarget[4] = prevLState[4] + (0.1f * (lTarget[4] - prevLState[4])); } lIdx--; } } m_GamesTrained++; if (m_GamesTrained % 100 == 0) { Save(SAVE_FILE); } }
public override void Learn(GameHistory history) { BoardRepresentation[] boards = new BoardRepresentation[history.Count()]; for (int i = boards.Length - 1; i >= 0; i--) { boards[i] = history.Peek(i); } int idx = boards.Length - 1; int firstRaceBoardIdx = idx; for (int i = 0; i < boards.Length; i++) { if (Race(boards[i])) { firstRaceBoardIdx = i; break; } } float[] wTarget = new float[5]; float[] lTarget = new float[5]; Grade(boards[idx], idx > firstRaceBoardIdx, wTarget); Grade(boards[idx - 1], (idx - 1) > firstRaceBoardIdx, lTarget); if (history.WinType == 1) { wTarget[0] = wTarget[0] + (STEP_SIZE * (1.0f - wTarget[0])); wTarget[1] = wTarget[1] + (STEP_SIZE * (0.0f - wTarget[1])); wTarget[2] = wTarget[2] + (STEP_SIZE * (0.0f - wTarget[2])); wTarget[3] = wTarget[3] + (STEP_SIZE * (0.0f - wTarget[3])); wTarget[4] = wTarget[4] + (STEP_SIZE * (0.0f - wTarget[4])); lTarget[0] = lTarget[0] + (STEP_SIZE * (0.0f - lTarget[0])); lTarget[1] = lTarget[1] + (STEP_SIZE * (0.0f - lTarget[1])); lTarget[2] = lTarget[2] + (STEP_SIZE * (0.0f - lTarget[2])); lTarget[3] = lTarget[3] + (STEP_SIZE * (0.0f - lTarget[3])); lTarget[4] = lTarget[4] + (STEP_SIZE * (0.0f - lTarget[4])); } else if (history.WinType == 2) { wTarget[0] = wTarget[0] + (STEP_SIZE * (0.0f - wTarget[0])); wTarget[1] = wTarget[1] + (STEP_SIZE * (1.0f - wTarget[1])); wTarget[2] = wTarget[2] + (STEP_SIZE * (0.0f - wTarget[2])); wTarget[3] = wTarget[3] + (STEP_SIZE * (0.0f - wTarget[3])); wTarget[4] = wTarget[4] + (STEP_SIZE * (0.0f - wTarget[4])); lTarget[0] = lTarget[0] + (STEP_SIZE * (0.0f - lTarget[0])); lTarget[1] = lTarget[1] + (STEP_SIZE * (0.0f - lTarget[1])); lTarget[2] = lTarget[2] + (STEP_SIZE * (0.0f - lTarget[2])); lTarget[3] = lTarget[3] + (STEP_SIZE * (1.0f - lTarget[3])); lTarget[4] = lTarget[4] + (STEP_SIZE * (0.0f - lTarget[4])); } else if (history.WinType == 3) { wTarget[0] = wTarget[0] + (STEP_SIZE * (0.0f - wTarget[0])); wTarget[1] = wTarget[1] + (STEP_SIZE * (0.0f - wTarget[1])); wTarget[2] = wTarget[2] + (STEP_SIZE * (1.0f - wTarget[2])); wTarget[3] = wTarget[3] + (STEP_SIZE * (0.0f - wTarget[3])); wTarget[4] = wTarget[4] + (STEP_SIZE * (0.0f - wTarget[4])); lTarget[0] = lTarget[0] + (STEP_SIZE * (0.0f - lTarget[0])); lTarget[1] = lTarget[1] + (STEP_SIZE * (0.0f - lTarget[1])); lTarget[2] = lTarget[2] + (STEP_SIZE * (0.0f - lTarget[2])); lTarget[3] = lTarget[3] + (STEP_SIZE * (0.0f - lTarget[3])); lTarget[4] = lTarget[4] + (STEP_SIZE * (1.0f - lTarget[4])); } bool isCurrentBoardWinner = true; float[] prevWState = new float[5]; float[] prevLState = new float[5]; while (idx >= 0) { if (isCurrentBoardWinner) { Train(boards[idx], idx > firstRaceBoardIdx, wTarget); idx--; if (idx > 0) { Grade(boards[idx - 1], idx - 1 > firstRaceBoardIdx, prevWState); wTarget[0] = prevWState[0] + (STEP_SIZE * (wTarget[0] - prevWState[0])); wTarget[1] = prevWState[1] + (STEP_SIZE * (wTarget[1] - prevWState[1])); wTarget[2] = prevWState[2] + (STEP_SIZE * (wTarget[2] - prevWState[2])); wTarget[3] = prevWState[3] + (STEP_SIZE * (wTarget[3] - prevWState[3])); wTarget[4] = prevWState[4] + (STEP_SIZE * (wTarget[4] - prevWState[4])); } } else { Train(boards[idx], idx > firstRaceBoardIdx, lTarget); idx--; if (idx > 0) { Grade(boards[idx - 1], idx - 1 > firstRaceBoardIdx, prevLState); lTarget[0] = prevLState[0] + (STEP_SIZE * (lTarget[0] - prevLState[0])); lTarget[1] = prevLState[1] + (STEP_SIZE * (lTarget[1] - prevLState[1])); lTarget[2] = prevLState[2] + (STEP_SIZE * (lTarget[2] - prevLState[2])); lTarget[3] = prevLState[3] + (STEP_SIZE * (lTarget[3] - prevLState[3])); lTarget[4] = prevLState[4] + (STEP_SIZE * (lTarget[4] - prevLState[4])); } } isCurrentBoardWinner = !isCurrentBoardWinner; } m_GamesTrained++; if (m_GamesTrained % 100 == 0) { Save(SAVE_FILE); } }