public Playfield rollOut() { //Playfield rollerState = new Playfield(state); //int thisDepth = this.depth(); //int action = 0; // while (!rollerState.isTerminal() && action != -1) { bool lethalCheck = false; //if (m_player.heuristicType == HeuristicType.LethalCheck) //lethalCheck = true; Playfield startState = new Playfield(state); Action move = null; int turn = 0; int maxMana = startState.playerFirst.ownMaxMana; int score = startState.getGameResult(); while (score == -1) { //List<Action> moves = Movegenerator.Instance.getMoveList(startState, false, false, true); //if (move != null) //{ //GameManager.Instance.moveCount++; //if (GameManager.Instance.moveCount == 562) //{ // int debug = 1; //} //var milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; Movegenerator.Instance.getMoveListForPlayfield(startState, false, lethalCheck); //List<Action> testMoves = Movegenerator.Instance.getMoveList(startState, lethalCheck, true, true); //if (startState.moveList.Count != testMoves.Count) //{ // int debug = 1; //} //double time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.myTimer += time; //Helpfunctions.Instance.logg("my:" + time + " total:" + GameManager.Instance.myTimer); //milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; //List<Action> bruteForceMoves = Movegenerator.Instance.getMoveList(startState, false, true, true); //time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.sfTimer += time; //Helpfunctions.Instance.logg("sf:" + time + " total:" + GameManager.Instance.sfTimer); //if (bruteForceMoves.Count != startState.moveList.Count) { // startState.printBoard(); // int debug = 1; // Helpfunctions.Instance.logg("BF Move List:------------------------------------"); // foreach (Action action in bruteForceMoves) // { // action.print(); // } // startState.printMoveList(); //} //} //Helpfunctions.Instance.logg("Count: " + startState.moveList.Count); if (startState.moveList.Count == 0) { //this.isTerminal = true; break; } else { move = startState.moveList[GameManager.getRNG().Next(startState.moveList.Count)]; startState.doAction(move); } score = startState.getGameResult(); } //TODO: last turn rollout //while (!finishRollout(rollerState,thisDepth,action)) { // action = roller.roll(rollerState); // //rollerState.next(action); // advance(rollerState, action); // m_player.getHeuristic().AddPlayoutInfo(action, rollerState); // thisDepth++; //} int endMaxMana = startState.playerFirst.ownMaxMana; if (endMaxMana != maxMana) { int debug = 1; } startState.endTurn(false, false); return startState; //return m_player.getHeuristic().value(startState); }
public float sample(Node p) { Playfield startState = new Playfield(p.state); Action move = null; int turn = p.depth; //Helpfunctions.Instance.logg("turn: " + turn); int score = startState.getGameResult(); while (score == -1) { //List<Action> moves = Movegenerator.Instance.getMoveList(startState, false, false, true); //if (move != null) //{ //GameManager.Instance.moveCount++; //if (GameManager.Instance.moveCount == 562) //{ // int debug = 1; //} //var milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; Movegenerator.Instance.getMoveListForPlayfield(startState, false); //double time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.myTimer += time; //Helpfunctions.Instance.logg("my:" + time + " total:" + GameManager.Instance.myTimer); //milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; //List<Action> bruteForceMoves = Movegenerator.Instance.getMoveList(startState, false, true, true); //time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.sfTimer += time; //Helpfunctions.Instance.logg("sf:" + time + " total:" + GameManager.Instance.sfTimer); //if (bruteForceMoves.Count != startState.moveList.Count) { // startState.printBoard(); // int debug = 1; // Helpfunctions.Instance.logg("BF Move List:------------------------------------"); // foreach (Action action in bruteForceMoves) // { // action.print(); // } // startState.printMoveList(); //} //} //Helpfunctions.Instance.logg("Count: " + startState.moveList.Count); if (startState.moveList.Count == 0) { startState.endTurn(false, false); //Helpfunctions.Instance.logg("Turn = " + startState.isOwnTurn); if (!isEndReached) { //if (startState.isOwnTurn && this.playerSide == 1 || !startState.isOwnTurn && this.playerSide == 0) //{ // turn++; // //Helpfunctions.Instance.logg("Turn++"); //} turn++; move = null; if (turn == rolloutDepth) //evaluate at deapth == 5 { //startState.printBoard(); float value = startState.getBoardValue(); //Helpfunctions.Instance.logg("value = " + value); if (value > bestValue) { bestBoard = new Playfield(startState); bestValue = value; } return value; } } } else { move = startState.moveList[GameManager.getRNG().Next(startState.moveList.Count)]; startState.doAction(move); } score = startState.getGameResult(); } isEndReached = true; if (playerSide == score) { return 1; } return 0; }
public override Action getMove() { float maxQValue = Single.MinValue; Action bestAction = null; Playfield bestState = null; //epsilon greedy List<Action> moves = Movegenerator.Instance.getMoveList(lastState, false, true, true); //int prevCount = lastState.playerSecond.ownMinions.Count; if (moves.Count == 0) { return bestAction; } if (GameManager.getRNG().NextDouble() < EPSILON) { bestAction = moves[GameManager.getRNG().Next(moves.Count)]; } else { foreach (Action action in moves) { Playfield afterState = new Playfield(lastState); afterState.doAction(action); float QValue = Q(afterState); if (QValue > maxQValue) { maxQValue = QValue; bestAction = action; bestState = afterState; //if (afterState.playerSecond.ownMinions.Count == 0 && prevCount != 0 && afterState.playerFirst.ownMinions.Count != 0)//hardcode player second //{ // reward = 1; // Helpfunctions.Instance.logg("board reward received"); //} if ((playerSide && afterState.getGameResult() == 0) || (!playerSide && afterState.getGameResult() == 1)) { //reward = afterState.turnCounter; reward = 1; //is it good? Helpfunctions.Instance.logg("win reward received"); } else { reward = 0; } } } } //update weights float difference = reward + DISCOUNT_FACTOR * maxQValue - qLast; //if(debug) System.out.printf("%.5f\n", difference); List<float> features = getFeatures(lastState); //printFeatures(); //self play? 相减? 检验正确性(update每一步打出来) tile coding? binary? lastState.debugMinions(); for (int j = 0; j < NUM_FEATURES; j++) { //if(debug) System.out.printf("w%d = %.5f + %.5f * %.5f * %.1f = ", i, weights.get(i), LEARNING_RATE, difference, features.get(i)); weights[j] = weights[j] + LEARNING_RATE * difference * features[j]; //if(debug) System.out.printf("%.5f\n", weights.get(i)); } normalizeWeights(); lastState = bestState; qLast = maxQValue; return bestAction; }