public GammonInterface( bool nackgammon, bool maxFivePiecesOnPoint ) { m_Board = new BgBoard( nackgammon ); m_BgDiceCup = new BgDiceCup(); m_Moves = new int[4]; m_Cube = new Cube(); m_UndoStack = new UndoStack(); m_History = new GameHistory(); NewGame( nackgammon, maxFivePiecesOnPoint ); }
public GammonInterface(bool nackgammon, bool maxFivePiecesOnPoint) { m_Board = new BgBoard(nackgammon); m_BgDiceCup = new BgDiceCup(); m_Moves = new int[4]; m_Cube = new Cube(); m_UndoStack = new UndoStack(); m_History = new GameHistory(); NewGame(nackgammon, maxFivePiecesOnPoint); }
public override void Learn( GameHistory history ) { m_Trained = true; BoardRepresentation[] looserBoards = new BoardRepresentation[history.Count() / 2]; int looserIdx = looserBoards.Length; for ( int i = history.Count()-2; i >= 0; i -= 2 ) looserBoards[--looserIdx] = history.Peek( i ); BoardRepresentation[] winnerBoards = new BoardRepresentation[history.Count() - looserBoards.Length]; int winnerIdx = winnerBoards.Length; for ( int i = history.Count()-1; i >= 0; i -= 2 ) winnerBoards[--winnerIdx] = history.Peek( i ); m_FuncCallibrator.Callibrate( winnerBoards, looserBoards ); }
public override void Learn( GameHistory history ) { BoardRepresentation[] looserBoards = new BoardRepresentation[history.Count() / 2]; int looserIdx = looserBoards.Length; for ( int i = history.Count()-2; i >= 0; i -= 2 ) looserBoards[--looserIdx] = history.Peek( i ); BoardRepresentation[] winnerBoards = new BoardRepresentation[history.Count() - looserBoards.Length]; int winnerIdx = winnerBoards.Length; for ( int i = history.Count()-1; i >= 0; i -= 2 ) winnerBoards[--winnerIdx] = history.Peek( i ); int wIdx = winnerBoards.Length - 1; int lIdx = looserBoards.Length - 1; float[] wTarget = Grade( winnerBoards[wIdx] );; float[] lTarget = Grade( looserBoards[lIdx] ); if ( history.WinType == 1 ) { wTarget[0] = wTarget[0] + ( 0.1f * ( 1.0f - wTarget[0] ) ); wTarget[1] = wTarget[1] + ( 0.1f * ( 0.0f - wTarget[1] ) ); wTarget[2] = wTarget[2] + ( 0.1f * ( 0.0f - wTarget[2] ) ); wTarget[3] = wTarget[3] + ( 0.1f * ( 0.0f - wTarget[3] ) ); wTarget[4] = wTarget[4] + ( 0.1f * ( 0.0f - wTarget[4] ) ); lTarget[0] = lTarget[0] + ( 0.1f * ( 0.0f - lTarget[0] ) ); lTarget[1] = lTarget[1] + ( 0.1f * ( 0.0f - lTarget[1] ) ); lTarget[2] = lTarget[2] + ( 0.1f * ( 0.0f - lTarget[2] ) ); lTarget[3] = lTarget[3] + ( 0.1f * ( 0.0f - lTarget[3] ) ); lTarget[4] = lTarget[4] + ( 0.1f * ( 0.0f - lTarget[4] ) ); } else if ( history.WinType == 2 ) { wTarget[0] = wTarget[0] + ( 0.1f * ( 0.0f - wTarget[0] ) ); wTarget[1] = wTarget[1] + ( 0.1f * ( 1.0f - wTarget[1] ) ); wTarget[2] = wTarget[2] + ( 0.1f * ( 0.0f - wTarget[2] ) ); wTarget[3] = wTarget[3] + ( 0.1f * ( 0.0f - wTarget[3] ) ); wTarget[4] = wTarget[4] + ( 0.1f * ( 0.0f - wTarget[4] ) ); lTarget[0] = lTarget[0] + ( 0.1f * ( 0.0f - lTarget[0] ) ); lTarget[1] = lTarget[1] + ( 0.1f * ( 0.0f - lTarget[1] ) ); lTarget[2] = lTarget[2] + ( 0.1f * ( 0.0f - lTarget[2] ) ); lTarget[3] = lTarget[3] + ( 0.1f * ( 1.0f - lTarget[3] ) ); lTarget[4] = lTarget[4] + ( 0.1f * ( 0.0f - lTarget[4] ) ); } else if ( history.WinType == 3 ) { wTarget[0] = wTarget[0] + ( 0.1f * ( 0.0f - wTarget[0] ) ); wTarget[1] = wTarget[1] + ( 0.1f * ( 0.0f - wTarget[1] ) ); wTarget[2] = wTarget[2] + ( 0.1f * ( 1.0f - wTarget[2] ) ); wTarget[3] = wTarget[3] + ( 0.1f * ( 0.0f - wTarget[3] ) ); wTarget[4] = wTarget[4] + ( 0.1f * ( 0.0f - wTarget[4] ) ); lTarget[0] = lTarget[0] + ( 0.1f * ( 0.0f - lTarget[0] ) ); lTarget[1] = lTarget[1] + ( 0.1f * ( 0.0f - lTarget[1] ) ); lTarget[2] = lTarget[2] + ( 0.1f * ( 0.0f - lTarget[2] ) ); lTarget[3] = lTarget[3] + ( 0.1f * ( 0.0f - lTarget[3] ) ); lTarget[4] = lTarget[4] + ( 0.1f * ( 1.0f - lTarget[4] ) ); } while ( lIdx >= 0 || wIdx >= 0 ) { if ( wIdx >= 0 ) { Train( winnerBoards[wIdx], wTarget ); if ( wIdx > 0 ) { float[] prevWState = Grade( winnerBoards[wIdx-1] ); wTarget[0] = prevWState[0] + ( 0.1f * ( wTarget[0] - prevWState[0] ) ); wTarget[1] = prevWState[1] + ( 0.1f * ( wTarget[1] - prevWState[1] ) ); wTarget[2] = prevWState[2] + ( 0.1f * ( wTarget[2] - prevWState[2] ) ); wTarget[3] = prevWState[3] + ( 0.1f * ( wTarget[3] - prevWState[3] ) ); wTarget[4] = prevWState[4] + ( 0.1f * ( wTarget[4] - prevWState[4] ) ); } wIdx--; } if ( lIdx >= 0 ) { Train( looserBoards[lIdx], lTarget ); if ( lIdx > 0 ) { float[] prevLState = Grade( looserBoards[lIdx-1] ); lTarget[0] = prevLState[0] + ( 0.1f * ( lTarget[0] - prevLState[0] ) ); lTarget[1] = prevLState[1] + ( 0.1f * ( lTarget[1] - prevLState[1] ) ); lTarget[2] = prevLState[2] + ( 0.1f * ( lTarget[2] - prevLState[2] ) ); lTarget[3] = prevLState[3] + ( 0.1f * ( lTarget[3] - prevLState[3] ) ); lTarget[4] = prevLState[4] + ( 0.1f * ( lTarget[4] - prevLState[4] ) ); } lIdx--; } } m_GamesTrained++; if ( m_GamesTrained % 100 == 0 ) Save( SAVE_FILE ); }
public override void Learn( GameHistory history ) { BoardRepresentation[] boards = new BoardRepresentation[history.Count()]; for ( int i = boards.Length-1; i >= 0; i-- ) boards[i] = history.Peek( i ); int idx = boards.Length-1; int firstRaceBoardIdx = idx; for ( int i = 0; i < boards.Length; i++ ) { if ( Race( boards[i] ) ) { firstRaceBoardIdx = i; break; } } float[] wTarget = new float[5]; float[] lTarget = new float[5]; Grade( boards[idx], idx > firstRaceBoardIdx, wTarget ); Grade( boards[idx-1], (idx-1) > firstRaceBoardIdx, lTarget ); if ( history.WinType == 1 ) { wTarget[0] = wTarget[0] + ( STEP_SIZE * ( 1.0f - wTarget[0] ) ); wTarget[1] = wTarget[1] + ( STEP_SIZE * ( 0.0f - wTarget[1] ) ); wTarget[2] = wTarget[2] + ( STEP_SIZE * ( 0.0f - wTarget[2] ) ); wTarget[3] = wTarget[3] + ( STEP_SIZE * ( 0.0f - wTarget[3] ) ); wTarget[4] = wTarget[4] + ( STEP_SIZE * ( 0.0f - wTarget[4] ) ); lTarget[0] = lTarget[0] + ( STEP_SIZE * ( 0.0f - lTarget[0] ) ); lTarget[1] = lTarget[1] + ( STEP_SIZE * ( 0.0f - lTarget[1] ) ); lTarget[2] = lTarget[2] + ( STEP_SIZE * ( 0.0f - lTarget[2] ) ); lTarget[3] = lTarget[3] + ( STEP_SIZE * ( 0.0f - lTarget[3] ) ); lTarget[4] = lTarget[4] + ( STEP_SIZE * ( 0.0f - lTarget[4] ) ); } else if ( history.WinType == 2 ) { wTarget[0] = wTarget[0] + ( STEP_SIZE * ( 0.0f - wTarget[0] ) ); wTarget[1] = wTarget[1] + ( STEP_SIZE * ( 1.0f - wTarget[1] ) ); wTarget[2] = wTarget[2] + ( STEP_SIZE * ( 0.0f - wTarget[2] ) ); wTarget[3] = wTarget[3] + ( STEP_SIZE * ( 0.0f - wTarget[3] ) ); wTarget[4] = wTarget[4] + ( STEP_SIZE * ( 0.0f - wTarget[4] ) ); lTarget[0] = lTarget[0] + ( STEP_SIZE * ( 0.0f - lTarget[0] ) ); lTarget[1] = lTarget[1] + ( STEP_SIZE * ( 0.0f - lTarget[1] ) ); lTarget[2] = lTarget[2] + ( STEP_SIZE * ( 0.0f - lTarget[2] ) ); lTarget[3] = lTarget[3] + ( STEP_SIZE * ( 1.0f - lTarget[3] ) ); lTarget[4] = lTarget[4] + ( STEP_SIZE * ( 0.0f - lTarget[4] ) ); } else if ( history.WinType == 3 ) { wTarget[0] = wTarget[0] + ( STEP_SIZE * ( 0.0f - wTarget[0] ) ); wTarget[1] = wTarget[1] + ( STEP_SIZE * ( 0.0f - wTarget[1] ) ); wTarget[2] = wTarget[2] + ( STEP_SIZE * ( 1.0f - wTarget[2] ) ); wTarget[3] = wTarget[3] + ( STEP_SIZE * ( 0.0f - wTarget[3] ) ); wTarget[4] = wTarget[4] + ( STEP_SIZE * ( 0.0f - wTarget[4] ) ); lTarget[0] = lTarget[0] + ( STEP_SIZE * ( 0.0f - lTarget[0] ) ); lTarget[1] = lTarget[1] + ( STEP_SIZE * ( 0.0f - lTarget[1] ) ); lTarget[2] = lTarget[2] + ( STEP_SIZE * ( 0.0f - lTarget[2] ) ); lTarget[3] = lTarget[3] + ( STEP_SIZE * ( 0.0f - lTarget[3] ) ); lTarget[4] = lTarget[4] + ( STEP_SIZE * ( 1.0f - lTarget[4] ) ); } bool isCurrentBoardWinner = true; float[] prevWState = new float[5]; float[] prevLState = new float[5]; while ( idx >= 0 ) { if ( isCurrentBoardWinner ) { Train( boards[idx], idx > firstRaceBoardIdx, wTarget ); idx--; if ( idx > 0 ) { Grade( boards[idx-1], idx-1 > firstRaceBoardIdx, prevWState ); wTarget[0] = prevWState[0] + ( STEP_SIZE * ( wTarget[0] - prevWState[0] ) ); wTarget[1] = prevWState[1] + ( STEP_SIZE * ( wTarget[1] - prevWState[1] ) ); wTarget[2] = prevWState[2] + ( STEP_SIZE * ( wTarget[2] - prevWState[2] ) ); wTarget[3] = prevWState[3] + ( STEP_SIZE * ( wTarget[3] - prevWState[3] ) ); wTarget[4] = prevWState[4] + ( STEP_SIZE * ( wTarget[4] - prevWState[4] ) ); } } else { Train( boards[idx], idx > firstRaceBoardIdx, lTarget ); idx--; if ( idx > 0 ) { Grade( boards[idx-1], idx-1 > firstRaceBoardIdx, prevLState ); lTarget[0] = prevLState[0] + ( STEP_SIZE * ( lTarget[0] - prevLState[0] ) ); lTarget[1] = prevLState[1] + ( STEP_SIZE * ( lTarget[1] - prevLState[1] ) ); lTarget[2] = prevLState[2] + ( STEP_SIZE * ( lTarget[2] - prevLState[2] ) ); lTarget[3] = prevLState[3] + ( STEP_SIZE * ( lTarget[3] - prevLState[3] ) ); lTarget[4] = prevLState[4] + ( STEP_SIZE * ( lTarget[4] - prevLState[4] ) ); } } isCurrentBoardWinner = !isCurrentBoardWinner; } m_GamesTrained++; if ( m_GamesTrained % 100 == 0 ) Save( SAVE_FILE ); }
public override void Learn( GameHistory history ) { }