public void getAllpossibleStates(Playfield state, ref List <Playfield> statesList) { List <Action> moves = Movegenerator.Instance.getMoveList(state, false, true, true, 0.0); if (moves.Count == 0) { if (tt.addToMap(state) == false) { statesList.Add(state); } return; } foreach (Action action in moves) { Playfield afterState = new Playfield(state); afterState.doAction(action); //if (tt.addToMap(afterState) == false) //{ // statesList.Add(afterState); getAllpossibleStates(afterState, ref statesList); //} } }
public Playfield QStep() { GC.Collect(); float maxQValue = Single.MinValue; Playfield bestState = lastState; //epsilon greedy //List<Action> moves = Movegenerator.Instance.getMoveList(lastState, false, false, true); //int prevCount = lastState.playerSecond.ownMinions.Count; tt = new TranspositionTable(); lastState.debugMinions(); tt.addToMap(new Playfield(lastState)); List <Playfield> moves = new List <Playfield>(); //Playfield currentState = new Playfield(lastState).endTurn(false, false); //if need end turn //moves.Add(new Playfield(lastState)); getAllpossibleStates(lastState, ref moves); Helpfunctions.Instance.logg("movesize = " + moves.Count); foreach (Playfield p in moves) { Helpfunctions.Instance.logg("===============P:hashkey = " + tt.getHashkey(p)); p.printActions(); p.printBoard(); } if (moves.Count == 0) { return(bestState); } if (GameManager.getRNG().NextDouble() < EPSILON) { bestState = moves[GameManager.getRNG().Next(moves.Count)]; } else { foreach (Playfield posState in moves) { float QValue = Q(posState); if (QValue > maxQValue) { maxQValue = QValue; bestState = posState; //if (afterState.playerSecond.ownMinions.Count == 0 && prevCount != 0 && afterState.playerFirst.ownMinions.Count != 0)//hardcode player second //{ // reward = 1; // Helpfunctions.Instance.logg("board reward received"); //} if ((playerSide && bestState.getGameResult() == 0) || (!playerSide && bestState.getGameResult() == 1)) { //reward = afterState.turnCounter; reward = 1; //is it good? Helpfunctions.Instance.logg("win reward received"); } else { reward = 0; } } } } //update weights float difference = reward + DISCOUNT_FACTOR * maxQValue - qLast; //if(debug) System.out.printf("%.5f\n", difference); List <float> features = getFeatures(lastState); //printFeatures(); //self play? 相减? 检验正确性(update每一步打出来) tile coding? binary? for (int j = 0; j < NUM_FEATURES; j++) { //if(debug) System.out.printf("w%d = %.5f + %.5f * %.5f * %.1f = ", i, weights.get(i), LEARNING_RATE, difference, features.get(i)); weights[j] = weights[j] + LEARNING_RATE * difference * features[j]; //if(debug) System.out.printf("%.5f\n", weights.get(i)); } normalizeWeights(); lastState = bestState; qLast = maxQValue; Helpfunctions.Instance.logg("best:"); bestState.printActions(); return(bestState); }
//public int expand(Node p) //{ // GameManager.Instance.moveCount++; // if (GameManager.Instance.moveCount == 225) // { // int debug = 1; // } // int state = 0; // Playfield afterState = new Playfield(p.state); // tt = new TranspositionTable(); // endTurnTt = new HandHashTable(); // List<Playfield> moves = new List<Playfield>(); // Movegenerator.Instance.getMoveListForPlayfield(afterState, null, false); // int counter = GameManager.Instance.moveCount; // getAllpossibleStates(afterState, ref moves); // if (GameManager.Instance.moveCount - counter > 1000) // { // Helpfunctions.Instance.logg("MOVES = " + (GameManager.Instance.moveCount - counter)); // int debug = 1; // } // foreach (Playfield pf in moves) // { // if (pf.moveList.Count != 0) // { // int debug = 1; // } // } // Helpfunctions.Instance.logg("try size = " + endTurnTt.TTable.Count); // //while (endTurnTt.TTable.Count == 1) // //{ // no moves available, change side // // tt.clearTable(); // // endTurnTt.clearTable(); // // afterState.endTurn(false, false); // // Movegenerator.Instance.getMoveListForPlayfield(afterState, null, false); // // getAllpossibleStates(afterState, ref moves); // // state = 1; // //} // if (endTurnTt.TTable.Count == 1) // state = 1; // //afterState.endTurn(false, false); // //Node originalNode = new Node(new Playfield(afterState), null, p.depth + 1); // //p.children.Add(originalNode); // foreach (KeyValuePair<float, List<Playfield>> move in endTurnTt.TTable) // { // Node afterNode = new Node(new Playfield(move.Value[0]), null, p.depth + 1); // //afterNode.state.doAction(move); // p.children.Add(afterNode); // } // Helpfunctions.Instance.logg("expand size = " + endTurnTt.TTable.Count); // //if (moves.Count > 100) // //{ // // foreach (Playfield pf in moves) // // { // // Helpfunctions.Instance.logg("key:" + endTurnTt.getHashkey(pf)); // // //pf.debugHand(); // // } // //} // return state; //} public void getAllpossibleStates(Playfield state, ref List <Playfield> statesList) { if (state.moveList.Count == 0) { Player mPlayer; String turn; //if (state.isOwnTurn) //{ // mPlayer = state.playerSecond; // turn = "second"; //} //else //{ // mPlayer = state.playerFirst; // turn = "first"; //} //Helpfunctions.Instance.logg(turn + ": pre mana = " + mPlayer.ownMaxMana); Playfield afterState = new Playfield(state); afterState.endTurn(false, false); afterState.drawTurnStartCard(); if (endTurnTt.addToMap(afterState) == false) { //GameManager.Instance.moveCount++; //Helpfunctions.Instance.logg("move = " + GameManager.Instance.moveCount + // ",mana = " + mPlayer.ownMaxMana); //Helpfunctions.Instance.logg("MOVES ==============================="); //state.printActions(); //Helpfunctions.Instance.logg("count: " + statesList.Count); //statesList.Add(afterState); } return; } foreach (Action action in state.moveList) { Playfield afterState = new Playfield(state); afterState.doAction(action); //Movegenerator.Instance.getMoveListForPlayfield(afterState, false, false); afterState.moveList = new List <Action>(Movegenerator.Instance.getMoveList(afterState, false, true, true, 0.0)); //if (action.actionType == actionEnum.playcard && action.card.card.name == CardDB.cardName.arcaneintellect) //{ // if (tt.addToMap(state) == false) // { // state.endTurn(false, false); // statesList.Add(state); // } // continue; //} if (tt.addToMap(afterState) == false) { // statesList.Add(afterState); //Helpfunctions.Instance.logg("Turn: " + afterState.isOwnTurn); getAllpossibleStates(afterState, ref statesList); } else { Helpfunctions.Instance.logg("pruned"); } } }