Beispiel #1
0
    public Playfield rollOut()
    {
        //Playfield rollerState = new Playfield(state);
        //int thisDepth = this.depth();
        //int action = 0;
        // while (!rollerState.isTerminal() && action != -1) {


            bool lethalCheck = false;
            //if (m_player.heuristicType == HeuristicType.LethalCheck)
                //lethalCheck = true;

            Playfield startState = new Playfield(state);
            Action move = null;
            int turn = 0;

            int maxMana = startState.playerFirst.ownMaxMana;

            int score = startState.getGameResult();
            while (score == -1)
            {
                //List<Action> moves = Movegenerator.Instance.getMoveList(startState, false, false, true);
                //if (move != null)
                //{
                //GameManager.Instance.moveCount++;
                //if (GameManager.Instance.moveCount == 562)
                //{
                //    int debug = 1;
                //}
                    //var milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds;
                Movegenerator.Instance.getMoveListForPlayfield(startState, false, lethalCheck);

                //List<Action> testMoves = Movegenerator.Instance.getMoveList(startState, lethalCheck, true, true);

                //if (startState.moveList.Count != testMoves.Count)
                //{
                //    int debug = 1;
                //}
                    //double time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds;
                    //GameManager.Instance.myTimer += time;
                    //Helpfunctions.Instance.logg("my:" + time + " total:" + GameManager.Instance.myTimer);


                    //milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds;
                    //List<Action> bruteForceMoves = Movegenerator.Instance.getMoveList(startState, false, true, true);
                    //time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds;
                    //GameManager.Instance.sfTimer += time;
                    //Helpfunctions.Instance.logg("sf:" + time + " total:" + GameManager.Instance.sfTimer);

                    //if (bruteForceMoves.Count != startState.moveList.Count) {
                    //    startState.printBoard();
                    //    int debug = 1;
                    //    Helpfunctions.Instance.logg("BF Move List:------------------------------------");
                    //    foreach (Action action in bruteForceMoves)
                    //    {
                    //        action.print();
                    //    }
                    //    startState.printMoveList();
                    //}
                //}
                //Helpfunctions.Instance.logg("Count: " + startState.moveList.Count);
                if (startState.moveList.Count == 0)
                {
                    //this.isTerminal = true;
                    break;
                }
                else
                {
                    move = startState.moveList[GameManager.getRNG().Next(startState.moveList.Count)];
                    startState.doAction(move);
                }
                score = startState.getGameResult();
            }


        //TODO: last turn rollout
        //while (!finishRollout(rollerState,thisDepth,action)) {
        //    action = roller.roll(rollerState);
        //    //rollerState.next(action);
        //    advance(rollerState, action);
        //    m_player.getHeuristic().AddPlayoutInfo(action, rollerState);
        //    thisDepth++;
        //}
            int endMaxMana = startState.playerFirst.ownMaxMana;

            if (endMaxMana != maxMana)
            {
                int debug = 1;
            }
            startState.endTurn(false, false);
            return startState;
            //return m_player.getHeuristic().value(startState);
    }
Beispiel #2
0
        public float sample(Node p)
        {
            Playfield startState = new Playfield(p.state);
            Action move = null;
            int turn = p.depth;

            //Helpfunctions.Instance.logg("turn: " + turn);

            int score = startState.getGameResult();
            while (score == -1)
            {
                //List<Action> moves = Movegenerator.Instance.getMoveList(startState, false, false, true);
                //if (move != null)
                //{
                //GameManager.Instance.moveCount++;
                //if (GameManager.Instance.moveCount == 562)
                //{
                //    int debug = 1;
                //}
                    //var milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds;
                    Movegenerator.Instance.getMoveListForPlayfield(startState, false);
                    //double time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds;
                    //GameManager.Instance.myTimer += time;
                    //Helpfunctions.Instance.logg("my:" + time + " total:" + GameManager.Instance.myTimer);


                    //milliseconds = (DateTime.Now - DateTime.MinValue).TotalMilliseconds;
                    //List<Action> bruteForceMoves = Movegenerator.Instance.getMoveList(startState, false, true, true);
                    //time = (DateTime.Now - DateTime.MinValue).TotalMilliseconds - milliseconds;
                    //GameManager.Instance.sfTimer += time;
                    //Helpfunctions.Instance.logg("sf:" + time + " total:" + GameManager.Instance.sfTimer);

                    //if (bruteForceMoves.Count != startState.moveList.Count) {
                    //    startState.printBoard();
                    //    int debug = 1;
                    //    Helpfunctions.Instance.logg("BF Move List:------------------------------------");
                    //    foreach (Action action in bruteForceMoves)
                    //    {
                    //        action.print();
                    //    }
                    //    startState.printMoveList();
                    //}
                //}
                //Helpfunctions.Instance.logg("Count: " + startState.moveList.Count);
                if (startState.moveList.Count == 0)
                {
                    startState.endTurn(false, false);
                    //Helpfunctions.Instance.logg("Turn = " + startState.isOwnTurn);
                    if (!isEndReached)
                    {
                        //if (startState.isOwnTurn && this.playerSide == 1 || !startState.isOwnTurn && this.playerSide == 0)
                        //{
                        //    turn++;
                        //    //Helpfunctions.Instance.logg("Turn++");
                        //}
                        turn++;
                        move = null;
                        if (turn == rolloutDepth) //evaluate at deapth == 5
                        {
                            //startState.printBoard();
                            float value = startState.getBoardValue();
                            //Helpfunctions.Instance.logg("value = " + value);
                            if (value > bestValue)
                            {
                                bestBoard = new Playfield(startState);
                                bestValue = value;
                            }
                            return value;
                        }
                    }
                }
                else
                {
                    move = startState.moveList[GameManager.getRNG().Next(startState.moveList.Count)];
                    startState.doAction(move);
                }
                score = startState.getGameResult();
            }

            isEndReached = true;
            if (playerSide == score)
            {
                return 1;
            }
            return 0;
        }
Beispiel #3
0
    public override Action getMove()
    {
        float maxQValue = Single.MinValue;
        Action bestAction = null;
        Playfield bestState = null;
        //epsilon greedy             

        List<Action> moves = Movegenerator.Instance.getMoveList(lastState, false, true, true);
        //int prevCount = lastState.playerSecond.ownMinions.Count;

        if (moves.Count == 0)
        {
            return bestAction;
        }

        if (GameManager.getRNG().NextDouble() < EPSILON)
        {
            bestAction = moves[GameManager.getRNG().Next(moves.Count)];
        }
        else
        {
            foreach (Action action in moves)
            {
                Playfield afterState = new Playfield(lastState);
                afterState.doAction(action);
                float QValue = Q(afterState);
                if (QValue > maxQValue)
                {
                    maxQValue = QValue;
                    bestAction = action;
                    bestState = afterState;
                    //if (afterState.playerSecond.ownMinions.Count == 0 && prevCount != 0 && afterState.playerFirst.ownMinions.Count != 0)//hardcode player second
                    //{
                    //    reward = 1;
                    //    Helpfunctions.Instance.logg("board reward received");
                    //}
                    if ((playerSide && afterState.getGameResult() == 0) || (!playerSide && afterState.getGameResult() == 1))
                    {
                        //reward = afterState.turnCounter;
                        reward = 1; //is it good?
                        Helpfunctions.Instance.logg("win reward received");
                    }
                    else {
                        reward = 0;
                    }
                }
            }
        }

        //update weights
        float difference = reward + DISCOUNT_FACTOR * maxQValue - qLast;
        //if(debug) System.out.printf("%.5f\n", difference);
        List<float> features = getFeatures(lastState);
        //printFeatures(); //self play? 相减? 检验正确性(update每一步打出来) tile coding? binary?
        lastState.debugMinions();
        for (int j = 0; j < NUM_FEATURES; j++)
        {
            //if(debug) System.out.printf("w%d = %.5f + %.5f * %.5f * %.1f = ", i, weights.get(i), LEARNING_RATE, difference, features.get(i));
            weights[j] = weights[j] + LEARNING_RATE * difference * features[j];
            //if(debug) System.out.printf("%.5f\n", weights.get(i));
        }
        normalizeWeights();

        lastState = bestState;
        qLast = maxQValue;

        return bestAction;
    }