示例#1
0
        public Playfield QStep()
        {
            GC.Collect();
            float     maxQValue = Single.MinValue;
            Playfield bestState = lastState;

            //epsilon greedy

            //List<Action> moves = Movegenerator.Instance.getMoveList(lastState, false, false, true);
            //int prevCount = lastState.playerSecond.ownMinions.Count;
            tt = new TranspositionTable();
            lastState.debugMinions();
            tt.addToMap(new Playfield(lastState));
            List <Playfield> moves = new List <Playfield>();

            //Playfield currentState = new Playfield(lastState).endTurn(false, false); //if need end turn
            //moves.Add(new Playfield(lastState));
            getAllpossibleStates(lastState, ref moves);

            Helpfunctions.Instance.logg("movesize = " + moves.Count);

            foreach (Playfield p in moves)
            {
                Helpfunctions.Instance.logg("===============P:hashkey = " + tt.getHashkey(p));
                p.printActions();
                p.printBoard();
            }

            if (moves.Count == 0)
            {
                return(bestState);
            }

            if (GameManager.getRNG().NextDouble() < EPSILON)
            {
                bestState = moves[GameManager.getRNG().Next(moves.Count)];
            }
            else
            {
                foreach (Playfield posState in moves)
                {
                    float QValue = Q(posState);
                    if (QValue > maxQValue)
                    {
                        maxQValue = QValue;
                        bestState = posState;
                        //if (afterState.playerSecond.ownMinions.Count == 0 && prevCount != 0 && afterState.playerFirst.ownMinions.Count != 0)//hardcode player second
                        //{
                        //    reward = 1;
                        //    Helpfunctions.Instance.logg("board reward received");
                        //}
                        if ((playerSide && bestState.getGameResult() == 0) || (!playerSide && bestState.getGameResult() == 1))
                        {
                            //reward = afterState.turnCounter;
                            reward = 1; //is it good?
                            Helpfunctions.Instance.logg("win reward received");
                        }
                        else
                        {
                            reward = 0;
                        }
                    }
                }
            }

            //update weights
            float difference = reward + DISCOUNT_FACTOR * maxQValue - qLast;
            //if(debug) System.out.printf("%.5f\n", difference);
            List <float> features = getFeatures(lastState);

            //printFeatures(); //self play? 相减? 检验正确性(update每一步打出来) tile coding? binary?
            for (int j = 0; j < NUM_FEATURES; j++)
            {
                //if(debug) System.out.printf("w%d = %.5f + %.5f * %.5f * %.1f = ", i, weights.get(i), LEARNING_RATE, difference, features.get(i));
                weights[j] = weights[j] + LEARNING_RATE * difference * features[j];
                //if(debug) System.out.printf("%.5f\n", weights.get(i));
            }
            normalizeWeights();

            lastState = bestState;
            qLast     = maxQValue;


            Helpfunctions.Instance.logg("best:");
            bestState.printActions();

            return(bestState);
        }