public void getAllpossibleStates(Playfield state, ref List<Playfield> statesList) { List<Action> moves = Movegenerator.Instance.getMoveList(state, false, true, true); if (moves.Count == 0) { if (tt.addToMap(state) == false) { statesList.Add(state); } return; } foreach (Action action in moves) { Playfield afterState = new Playfield(state); afterState.doAction(action); //if (tt.addToMap(afterState) == false) //{ // statesList.Add(afterState); getAllpossibleStates(afterState, ref statesList); //} } }
//public int expand(Node p) //{ // GameManager.Instance.moveCount++; // if (GameManager.Instance.moveCount == 225) // { // int debug = 1; // } // int state = 0; // Playfield afterState = new Playfield(p.state); // tt = new TranspositionTable(); // endTurnTt = new HandHashTable(); // List<Playfield> moves = new List<Playfield>(); // Movegenerator.Instance.getMoveListForPlayfield(afterState, null, false); // int counter = GameManager.Instance.moveCount; // getAllpossibleStates(afterState, ref moves); // if (GameManager.Instance.moveCount - counter > 1000) // { // Helpfunctions.Instance.logg("MOVES = " + (GameManager.Instance.moveCount - counter)); // int debug = 1; // } // foreach (Playfield pf in moves) // { // if (pf.moveList.Count != 0) // { // int debug = 1; // } // } // Helpfunctions.Instance.logg("try size = " + endTurnTt.TTable.Count); // //while (endTurnTt.TTable.Count == 1) // //{ // no moves available, change side // // tt.clearTable(); // // endTurnTt.clearTable(); // // afterState.endTurn(false, false); // // Movegenerator.Instance.getMoveListForPlayfield(afterState, null, false); // // getAllpossibleStates(afterState, ref moves); // // state = 1; // //} // if (endTurnTt.TTable.Count == 1) // state = 1; // //afterState.endTurn(false, false); // //Node originalNode = new Node(new Playfield(afterState), null, p.depth + 1); // //p.children.Add(originalNode); // foreach (KeyValuePair<float, List<Playfield>> move in endTurnTt.TTable) // { // Node afterNode = new Node(new Playfield(move.Value[0]), null, p.depth + 1); // //afterNode.state.doAction(move); // p.children.Add(afterNode); // } // Helpfunctions.Instance.logg("expand size = " + endTurnTt.TTable.Count); // //if (moves.Count > 100) // //{ // // foreach (Playfield pf in moves) // // { // // Helpfunctions.Instance.logg("key:" + endTurnTt.getHashkey(pf)); // // //pf.debugHand(); // // } // //} // return state; //} public void getAllpossibleStates(Playfield state, ref List<Playfield> statesList) { GameManager.Instance.moveCount++; if (state.moveList.Count == 0) { Player mPlayer; String turn; //if (state.isOwnTurn) //{ // mPlayer = state.playerSecond; // turn = "second"; //} //else //{ // mPlayer = state.playerFirst; // turn = "first"; //} //Helpfunctions.Instance.logg(turn + ": pre mana = " + mPlayer.ownMaxMana); Playfield afterState = new Playfield(state); afterState.endTurn(false, false); if (endTurnTt.addToMap(afterState) == false) { //GameManager.Instance.moveCount++; //Helpfunctions.Instance.logg("move = " + GameManager.Instance.moveCount + // ",mana = " + mPlayer.ownMaxMana); //Helpfunctions.Instance.logg("MOVES ==============================="); //state.printActions(); //Helpfunctions.Instance.logg("count: " + statesList.Count); //statesList.Add(afterState); } return; } foreach (Action action in state.moveList) { Playfield afterState = new Playfield(state); afterState.doAction(action); Movegenerator.Instance.getMoveListForPlayfield(afterState, false); //if (action.actionType == actionEnum.playcard && action.card.card.name == CardDB.cardName.arcaneintellect) //{ // if (tt.addToMap(state) == false) // { // state.endTurn(false, false); // statesList.Add(state); // } // continue; //} if (tt.addToMap(afterState) == false) { // statesList.Add(afterState); //Helpfunctions.Instance.logg("Turn: " + afterState.isOwnTurn); getAllpossibleStates(afterState, ref statesList); } else { Helpfunctions.Instance.logg("pruned"); } } }
public float sample(Node p) { Playfield startState = new Playfield(p.state); Action move = null; int turn = p.depth; //Helpfunctions.Instance.logg("turn: " + turn); int score = startState.getGameResult(); while (score == -1) { //List<Action> moves = Movegenerator.Instance.getMoveList(startState, false, false, true); //if (move != null) //{ //GameManager.Instance.moveCount++; //if (GameManager.Instance.moveCount == 562) //{ // int debug = 1; //} //var milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; Movegenerator.Instance.getMoveListForPlayfield(startState, false); //double time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.myTimer += time; //Helpfunctions.Instance.logg("my:" + time + " total:" + GameManager.Instance.myTimer); //milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; //List<Action> bruteForceMoves = Movegenerator.Instance.getMoveList(startState, false, true, true); //time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.sfTimer += time; //Helpfunctions.Instance.logg("sf:" + time + " total:" + GameManager.Instance.sfTimer); //if (bruteForceMoves.Count != startState.moveList.Count) { // startState.printBoard(); // int debug = 1; // Helpfunctions.Instance.logg("BF Move List:------------------------------------"); // foreach (Action action in bruteForceMoves) // { // action.print(); // } // startState.printMoveList(); //} //} //Helpfunctions.Instance.logg("Count: " + startState.moveList.Count); if (startState.moveList.Count == 0) { startState.endTurn(false, false); //Helpfunctions.Instance.logg("Turn = " + startState.isOwnTurn); if (!isEndReached) { //if (startState.isOwnTurn && this.playerSide == 1 || !startState.isOwnTurn && this.playerSide == 0) //{ // turn++; // //Helpfunctions.Instance.logg("Turn++"); //} turn++; move = null; if (turn == rolloutDepth) //evaluate at deapth == 5 { //startState.printBoard(); float value = startState.getBoardValue(); //Helpfunctions.Instance.logg("value = " + value); if (value > bestValue) { bestBoard = new Playfield(startState); bestValue = value; } return value; } } } else { move = startState.moveList[GameManager.getRNG().Next(startState.moveList.Count)]; startState.doAction(move); } score = startState.getGameResult(); } isEndReached = true; if (playerSide == score) { return 1; } return 0; }
public Playfield rollOut() { //Playfield rollerState = new Playfield(state); //int thisDepth = this.depth(); //int action = 0; // while (!rollerState.isTerminal() && action != -1) { bool lethalCheck = false; //if (m_player.heuristicType == HeuristicType.LethalCheck) //lethalCheck = true; Playfield startState = new Playfield(state); Action move = null; int turn = 0; int maxMana = startState.playerFirst.ownMaxMana; int score = startState.getGameResult(); while (score == -1) { //List<Action> moves = Movegenerator.Instance.getMoveList(startState, false, false, true); //if (move != null) //{ //GameManager.Instance.moveCount++; //if (GameManager.Instance.moveCount == 562) //{ // int debug = 1; //} //var milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; Movegenerator.Instance.getMoveListForPlayfield(startState, false, lethalCheck); //List<Action> testMoves = Movegenerator.Instance.getMoveList(startState, lethalCheck, true, true); //if (startState.moveList.Count != testMoves.Count) //{ // int debug = 1; //} //double time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.myTimer += time; //Helpfunctions.Instance.logg("my:" + time + " total:" + GameManager.Instance.myTimer); //milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds; //List<Action> bruteForceMoves = Movegenerator.Instance.getMoveList(startState, false, true, true); //time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds; //GameManager.Instance.sfTimer += time; //Helpfunctions.Instance.logg("sf:" + time + " total:" + GameManager.Instance.sfTimer); //if (bruteForceMoves.Count != startState.moveList.Count) { // startState.printBoard(); // int debug = 1; // Helpfunctions.Instance.logg("BF Move List:------------------------------------"); // foreach (Action action in bruteForceMoves) // { // action.print(); // } // startState.printMoveList(); //} //} //Helpfunctions.Instance.logg("Count: " + startState.moveList.Count); if (startState.moveList.Count == 0) { //this.isTerminal = true; break; } else { move = startState.moveList[GameManager.getRNG().Next(startState.moveList.Count)]; startState.doAction(move); } score = startState.getGameResult(); } //TODO: last turn rollout //while (!finishRollout(rollerState,thisDepth,action)) { // action = roller.roll(rollerState); // //rollerState.next(action); // advance(rollerState, action); // m_player.getHeuristic().AddPlayoutInfo(action, rollerState); // thisDepth++; //} int endMaxMana = startState.playerFirst.ownMaxMana; if (endMaxMana != maxMana) { int debug = 1; } startState.endTurn(false, false); return startState; //return m_player.getHeuristic().value(startState); }
public void expand() { bool lethalCheck = false; //if (m_player.heuristicType == HeuristicType.LethalCheck) //lethalCheck = true; Playfield afterState = new Playfield(state); Movegenerator.Instance.getMoveListForPlayfield(afterState, false, lethalCheck); //afterState.printMoveList(); //List<Action> testMoves = Movegenerator.Instance.getMoveList(afterState, lethalCheck, true, true); //if (afterState.moveList.Count != testMoves.Count) //{ // int debug = 1; //} isExpanded = true; //List<Action> testMoves = Movegenerator.Instance.getMoveList(afterState, lethalCheck, true, true); if (afterState.moveList.Count > 0) { children = new ParetoTreeNode[afterState.moveList.Count]; int i = 0; foreach (Action a in afterState.moveList) { Playfield nextState = new Playfield(afterState); nextState.doAction(a); ParetoTreeNode tn = new ParetoTreeNode(nextState, this, i, this.roller, this.mTreePolicy, this.m_rnd, this.m_player); children[i] = tn; i++; } //Helpfunctions.Instance.logg("node expanded: " + this.nodeNum + ", children size: " + this.children.Length); //totValue = new double[afterState.moveList.Count]; if (parent == null) //This is only for the root: { this.initValueRoute(afterState.moveList.Count); } } else { this.isTerminal = true; //this.isExhausted = true; //if (parent != null) //{ // parent.numExhaustedChildren++; //} } }
public void QLearning(){ for (int i = 0; i < maxEpisodes; i++){ //lastState = new Playfield(initState); lastState.drawInitCards(); qLast = Q(lastState); int score = lastState.getGameResult(); while (score == -1) { List<Action> moves = Movegenerator.Instance.getMoveList(lastState, false, true, true); if (moves.Count == 0) { lastState.endTurn(false, false); } else { float maxQValue = 0; Action bestAction = null; Playfield bestState = null; //psilon greedy if (GameManager.getRNG().NextDouble() < EPSILON) { bestAction = moves[GameManager.getRNG().Next(moves.Count)]; }else{ foreach (Action action in moves){ Playfield afterState = new Playfield(lastState); afterState.doAction(action); float QValue = Q(afterState); if (QValue > maxQValue){ maxQValue = QValue; bestAction = action; bestState = afterState; } } } //update weights float difference = reward + DISCOUNT_FACTOR * maxQValue - qLast; //if(debug) System.out.printf("%.5f\n", difference); List<float> features = getFeatures(lastState); for(int j = 0; j < NUM_FEATURES; j++) { //if(debug) System.out.printf("w%d = %.5f + %.5f * %.5f * %.1f = ", i, weights.get(i), LEARNING_RATE, difference, features.get(i)); weights[j] = weights[j] + LEARNING_RATE * difference * features[j]; //if(debug) System.out.printf("%.5f\n", weights.get(i)); } normalizeWeights(); lastState = bestState; qLast = maxQValue; } score = lastState.getGameResult(); } } }
public override Action getMove() { float maxQValue = Single.MinValue; Action bestAction = null; Playfield bestState = null; //epsilon greedy List<Action> moves = Movegenerator.Instance.getMoveList(lastState, false, true, true); //int prevCount = lastState.playerSecond.ownMinions.Count; if (moves.Count == 0) { return bestAction; } if (GameManager.getRNG().NextDouble() < EPSILON) { bestAction = moves[GameManager.getRNG().Next(moves.Count)]; } else { foreach (Action action in moves) { Playfield afterState = new Playfield(lastState); afterState.doAction(action); float QValue = Q(afterState); if (QValue > maxQValue) { maxQValue = QValue; bestAction = action; bestState = afterState; //if (afterState.playerSecond.ownMinions.Count == 0 && prevCount != 0 && afterState.playerFirst.ownMinions.Count != 0)//hardcode player second //{ // reward = 1; // Helpfunctions.Instance.logg("board reward received"); //} if ((playerSide && afterState.getGameResult() == 0) || (!playerSide && afterState.getGameResult() == 1)) { //reward = afterState.turnCounter; reward = 1; //is it good? Helpfunctions.Instance.logg("win reward received"); } else { reward = 0; } } } } //update weights float difference = reward + DISCOUNT_FACTOR * maxQValue - qLast; //if(debug) System.out.printf("%.5f\n", difference); List<float> features = getFeatures(lastState); //printFeatures(); //self play? 相减? 检验正确性(update每一步打出来) tile coding? binary? lastState.debugMinions(); for (int j = 0; j < NUM_FEATURES; j++) { //if(debug) System.out.printf("w%d = %.5f + %.5f * %.5f * %.1f = ", i, weights.get(i), LEARNING_RATE, difference, features.get(i)); weights[j] = weights[j] + LEARNING_RATE * difference * features[j]; //if(debug) System.out.printf("%.5f\n", weights.get(i)); } normalizeWeights(); lastState = bestState; qLast = maxQValue; return bestAction; }