Beispiel #1
0
            private static void ObjFunctermAdd(State s, CorrelatedQTable q, Dictionary <Tuple <Action, Action>, Decision> actDecisions, SumTermBuilder objectSum)
            {
                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action player2Action in Enum.GetValues(typeof(Action)))
                    {
                        var policy = getActDecision(currentAction, player2Action, actDecisions);
                        var qValue = q.getCurrPQvalue(s, currentAction, player2Action);

                        objectSum.Add(policy * qValue);
                    }
                }

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action player2Action in Enum.GetValues(typeof(Action)))
                    {
                        var policy    = getActDecision(currentAction, player2Action, actDecisions);
                        var player2Qv = q.getPlayer2Qval(s, currentAction, player2Action);

                        objectSum.Add(policy * player2Qv);
                    }
                }
            }
Beispiel #2
0
            private static void AddObjectiveFunctionTerms(State s, CorrelatedQTable q, Dictionary <Tuple <Action, Action>, Decision> actionDecisions, SumTermBuilder objectiveSum)
            {
                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var pi     = GetActionDecision(currentAction, opponentAction, actionDecisions);
                        var qValue = q.GetCurrentPlayerQValue(s, currentAction, opponentAction);

                        objectiveSum.Add(pi * qValue);
                    }
                }

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var pi             = GetActionDecision(currentAction, opponentAction, actionDecisions);
                        var opponentQValue = q.GetOpponentQValue(s, currentAction, opponentAction);

                        objectiveSum.Add(pi * opponentQValue);
                    }
                }
            }
Beispiel #3
0
            public static Tuple <double, double> GetValue(State s, CorrelatedQTable q)
            {
                var contxt = SolverContext.GetContext();

                contxt.ClearModel();
                var model = contxt.CreateModel();

                var actDecisions = new Dictionary <Tuple <Action, Action>, Decision>();

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action player2Action in Enum.GetValues(typeof(Action)))
                    {
                        var decision = new Decision(Domain.RealNonnegative, currentAction.ToString() + player2Action.ToString());
                        model.AddDecisions(decision);
                        actDecisions.Add(new Tuple <Action, Action>(currentAction, player2Action), decision);
                    }
                }

                var actDecisionSum = new SumTermBuilder(25);

                foreach (var decision in actDecisions.Values)
                {
                    actDecisionSum.Add(decision);
                }

                model.AddConstraint("probSumConst", actDecisionSum.ToTerm() == 1.0);

                rationalconsts(s, q.getCurrPQvalue, actDecisions, model, "A");
                rationalityConstrPlayer2(s, q.getPlayer2Qval, actDecisions, model, "B");

                var objectSum = new SumTermBuilder(10);

                //Add my terms from my Q table to objective function
                ObjFunctermAdd(s, q, actDecisions, objectSum);

                model.AddGoal("MaximizeV", GoalKind.Maximize, objectSum.ToTerm());

                var sol = contxt.Solve(new SimplexDirective());



                if (sol.Quality != SolverQuality.Optimal)
                {
                    contxt.ClearModel();
                    return(new Tuple <double, double>(1.0, 1.0));
                }

                double Player1nextVal = 0.0;
                double Player2nextVal = 0.0;

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action player2Action in Enum.GetValues(typeof(Action)))
                    {
                        var policy = getActDecision(currentAction, player2Action, actDecisions);
                        var qValue = q.getCurrPQvalue(s, currentAction, player2Action);
                        Player1nextVal += policy.ToDouble() * qValue;
                        var player2Qv = q.getPlayer2Qval(s, currentAction, player2Action);
                        Player2nextVal += policy.ToDouble() * player2Qv;
                    }
                }

                return(new Tuple <double, double>(Player1nextVal, Player2nextVal));
            }
Beispiel #4
0
            public static Tuple <double, double> GetValue(State s, CorrelatedQTable q)
            {
                var context = SolverContext.GetContext();

                context.ClearModel();
                var model = context.CreateModel();

                var actionDecisions = new Dictionary <Tuple <Action, Action>, Decision>();

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var decision = new Decision(Domain.RealNonnegative, currentAction.ToString() + opponentAction.ToString());
                        model.AddDecisions(decision);
                        actionDecisions.Add(new Tuple <Action, Action>(currentAction, opponentAction), decision);
                    }
                }

                var actionDecisionSum = new SumTermBuilder(25);

                foreach (var decision in actionDecisions.Values)
                {
                    actionDecisionSum.Add(decision);
                }

                model.AddConstraint("probSumConst", actionDecisionSum.ToTerm() == 1.0);

                SetupRationalityConstraints(s, q.GetCurrentPlayerQValue, actionDecisions, model, "A");
                SetupRationalityConstraintsOpponent(s, q.GetOpponentQValue, actionDecisions, model, "B");

                var objectiveSum = new SumTermBuilder(10);

                //Add my terms from my Q table to objective function
                AddObjectiveFunctionTerms(s, q, actionDecisions, objectiveSum);

                model.AddGoal("MaximizeV", GoalKind.Maximize, objectiveSum.ToTerm());

                var solution = context.Solve(new SimplexDirective());

                //Console.WriteLine(solution.GetReport());

                if (solution.Quality != SolverQuality.Optimal)
                {
                    context.ClearModel();
                    return(new Tuple <double, double>(1.0, 1.0));
                }

                double currentPlayerNextValue = 0.0;
                double opponentNextValue      = 0.0;

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var pi     = GetActionDecision(currentAction, opponentAction, actionDecisions);
                        var qValue = q.GetCurrentPlayerQValue(s, currentAction, opponentAction);
                        currentPlayerNextValue += pi.ToDouble() * qValue;
                        var opponentQValue = q.GetOpponentQValue(s, currentAction, opponentAction);
                        opponentNextValue += pi.ToDouble() * opponentQValue;
                    }
                }

                return(new Tuple <double, double>(currentPlayerNextValue, opponentNextValue));
            }
        private static void RunCorrelatedQ()
        {
            var S       = new StateSet();
            var A       = new JointActionSet();
            var P       = new Transition(S, A);
            var Q_Joint = new CorrelatedQTable();
            var R       = new Rewards();

            var ERR = new Dictionary <int, double>();

            var j = 0;

            while (j <= 1000000)
            {
                //Initialize state according to Figure 4
                var initialState = new State(2, 1, BallPossessor.B);
                var done         = false;
                var currState    = initialState;

                var actions = A.GetNextJointAction();

                var playerAAction = actions.currplayer1Act;
                var playerBAction = actions.player2act;

                while (!done)
                {
                    if (j % 20000 == 0)
                    {
                        Console.WriteLine(j);
                    }

                    var q_fig_4_initial = Q_Joint.getCurrPQvalue(initialState, Action.S, Action.X);

                    var nextState = P.GetNextState(currState, new JointAction(playerAAction, playerBAction));

                    var playerAReward = R.GetPlayerAReward(nextState);
                    var playerBReward = R.GetPlayerBReward(nextState);

                    if (playerAReward != 0.0 || playerBReward != 0.0)
                    {
                        done = true;
                    }

                    Q_Joint.UpdateQValue(currState, nextState, playerAAction, playerBAction, playerAReward, playerBReward, done);

                    if (currState.Equals(initialState) && playerAAction == Action.S && playerBAction == Action.X)
                    {
                        var q_fig_4 = Q_Joint.getCurrPQvalue(initialState, Action.S, Action.X);
                        var diff    = Math.Abs(q_fig_4 - q_fig_4_initial);
                        Console.WriteLine(diff);
                        ERR.Add(j, diff);
                    }

                    currState = nextState;

                    actions = A.GetNextJointAction();

                    playerAAction = actions.currplayer1Act;
                    playerBAction = actions.player2act;

                    ++j;
                }
            }

            using (StreamWriter sw = File.CreateText("output.csv"))
            {
                foreach (var kvp in ERR)
                {
                    sw.WriteLine(kvp.Key + "," + kvp.Value);
                }
            }
        }