private static void ObjFunctermAdd(State s, CorrelatedQTable q, Dictionary <Tuple <Action, Action>, Decision> actDecisions, SumTermBuilder objectSum) { foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action player2Action in Enum.GetValues(typeof(Action))) { var policy = getActDecision(currentAction, player2Action, actDecisions); var qValue = q.getCurrPQvalue(s, currentAction, player2Action); objectSum.Add(policy * qValue); } } foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action player2Action in Enum.GetValues(typeof(Action))) { var policy = getActDecision(currentAction, player2Action, actDecisions); var player2Qv = q.getPlayer2Qval(s, currentAction, player2Action); objectSum.Add(policy * player2Qv); } } }
private static void AddObjectiveFunctionTerms(State s, CorrelatedQTable q, Dictionary <Tuple <Action, Action>, Decision> actionDecisions, SumTermBuilder objectiveSum) { foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action opponentAction in Enum.GetValues(typeof(Action))) { var pi = GetActionDecision(currentAction, opponentAction, actionDecisions); var qValue = q.GetCurrentPlayerQValue(s, currentAction, opponentAction); objectiveSum.Add(pi * qValue); } } foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action opponentAction in Enum.GetValues(typeof(Action))) { var pi = GetActionDecision(currentAction, opponentAction, actionDecisions); var opponentQValue = q.GetOpponentQValue(s, currentAction, opponentAction); objectiveSum.Add(pi * opponentQValue); } } }
public static Tuple <double, double> GetValue(State s, CorrelatedQTable q) { var contxt = SolverContext.GetContext(); contxt.ClearModel(); var model = contxt.CreateModel(); var actDecisions = new Dictionary <Tuple <Action, Action>, Decision>(); foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action player2Action in Enum.GetValues(typeof(Action))) { var decision = new Decision(Domain.RealNonnegative, currentAction.ToString() + player2Action.ToString()); model.AddDecisions(decision); actDecisions.Add(new Tuple <Action, Action>(currentAction, player2Action), decision); } } var actDecisionSum = new SumTermBuilder(25); foreach (var decision in actDecisions.Values) { actDecisionSum.Add(decision); } model.AddConstraint("probSumConst", actDecisionSum.ToTerm() == 1.0); rationalconsts(s, q.getCurrPQvalue, actDecisions, model, "A"); rationalityConstrPlayer2(s, q.getPlayer2Qval, actDecisions, model, "B"); var objectSum = new SumTermBuilder(10); //Add my terms from my Q table to objective function ObjFunctermAdd(s, q, actDecisions, objectSum); model.AddGoal("MaximizeV", GoalKind.Maximize, objectSum.ToTerm()); var sol = contxt.Solve(new SimplexDirective()); if (sol.Quality != SolverQuality.Optimal) { contxt.ClearModel(); return(new Tuple <double, double>(1.0, 1.0)); } double Player1nextVal = 0.0; double Player2nextVal = 0.0; foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action player2Action in Enum.GetValues(typeof(Action))) { var policy = getActDecision(currentAction, player2Action, actDecisions); var qValue = q.getCurrPQvalue(s, currentAction, player2Action); Player1nextVal += policy.ToDouble() * qValue; var player2Qv = q.getPlayer2Qval(s, currentAction, player2Action); Player2nextVal += policy.ToDouble() * player2Qv; } } return(new Tuple <double, double>(Player1nextVal, Player2nextVal)); }
public static Tuple <double, double> GetValue(State s, CorrelatedQTable q) { var context = SolverContext.GetContext(); context.ClearModel(); var model = context.CreateModel(); var actionDecisions = new Dictionary <Tuple <Action, Action>, Decision>(); foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action opponentAction in Enum.GetValues(typeof(Action))) { var decision = new Decision(Domain.RealNonnegative, currentAction.ToString() + opponentAction.ToString()); model.AddDecisions(decision); actionDecisions.Add(new Tuple <Action, Action>(currentAction, opponentAction), decision); } } var actionDecisionSum = new SumTermBuilder(25); foreach (var decision in actionDecisions.Values) { actionDecisionSum.Add(decision); } model.AddConstraint("probSumConst", actionDecisionSum.ToTerm() == 1.0); SetupRationalityConstraints(s, q.GetCurrentPlayerQValue, actionDecisions, model, "A"); SetupRationalityConstraintsOpponent(s, q.GetOpponentQValue, actionDecisions, model, "B"); var objectiveSum = new SumTermBuilder(10); //Add my terms from my Q table to objective function AddObjectiveFunctionTerms(s, q, actionDecisions, objectiveSum); model.AddGoal("MaximizeV", GoalKind.Maximize, objectiveSum.ToTerm()); var solution = context.Solve(new SimplexDirective()); //Console.WriteLine(solution.GetReport()); if (solution.Quality != SolverQuality.Optimal) { context.ClearModel(); return(new Tuple <double, double>(1.0, 1.0)); } double currentPlayerNextValue = 0.0; double opponentNextValue = 0.0; foreach (Action currentAction in Enum.GetValues(typeof(Action))) { foreach (Action opponentAction in Enum.GetValues(typeof(Action))) { var pi = GetActionDecision(currentAction, opponentAction, actionDecisions); var qValue = q.GetCurrentPlayerQValue(s, currentAction, opponentAction); currentPlayerNextValue += pi.ToDouble() * qValue; var opponentQValue = q.GetOpponentQValue(s, currentAction, opponentAction); opponentNextValue += pi.ToDouble() * opponentQValue; } } return(new Tuple <double, double>(currentPlayerNextValue, opponentNextValue)); }
private static void RunCorrelatedQ() { var S = new StateSet(); var A = new JointActionSet(); var P = new Transition(S, A); var Q_Joint = new CorrelatedQTable(); var R = new Rewards(); var ERR = new Dictionary <int, double>(); var j = 0; while (j <= 1000000) { //Initialize state according to Figure 4 var initialState = new State(2, 1, BallPossessor.B); var done = false; var currState = initialState; var actions = A.GetNextJointAction(); var playerAAction = actions.currplayer1Act; var playerBAction = actions.player2act; while (!done) { if (j % 20000 == 0) { Console.WriteLine(j); } var q_fig_4_initial = Q_Joint.getCurrPQvalue(initialState, Action.S, Action.X); var nextState = P.GetNextState(currState, new JointAction(playerAAction, playerBAction)); var playerAReward = R.GetPlayerAReward(nextState); var playerBReward = R.GetPlayerBReward(nextState); if (playerAReward != 0.0 || playerBReward != 0.0) { done = true; } Q_Joint.UpdateQValue(currState, nextState, playerAAction, playerBAction, playerAReward, playerBReward, done); if (currState.Equals(initialState) && playerAAction == Action.S && playerBAction == Action.X) { var q_fig_4 = Q_Joint.getCurrPQvalue(initialState, Action.S, Action.X); var diff = Math.Abs(q_fig_4 - q_fig_4_initial); Console.WriteLine(diff); ERR.Add(j, diff); } currState = nextState; actions = A.GetNextJointAction(); playerAAction = actions.currplayer1Act; playerBAction = actions.player2act; ++j; } } using (StreamWriter sw = File.CreateText("output.csv")) { foreach (var kvp in ERR) { sw.WriteLine(kvp.Key + "," + kvp.Value); } } }