public Dictionary <Action, double> GetValue(State s, FriendQTable q) { var context = SolverContext.GetContext(); var model = context.CreateModel(); var actionDecisions = new List <Decision>(); foreach (var action in Enum.GetNames(typeof(Action))) { var decision = new Decision(Domain.RealNonnegative, action); model.AddDecisions(decision); actionDecisions.Add(decision); } var valueDecision = new Decision(Domain.RealNonnegative, "value"); model.AddDecisions(valueDecision); model.AddConstraint("probSumConst", actionDecisions[0] + actionDecisions[1] + actionDecisions[2] + actionDecisions[3] + actionDecisions[4] == 1.0); int constCount = 0; foreach (Action playerOneAction in Enum.GetValues(typeof(Action))) { var qConstraintValues = new List <double>(); foreach (Action playerTwoAction in Enum.GetValues(typeof(Action))) { qConstraintValues.Add(q.GetQValue(s, playerOneAction, playerTwoAction)); } model.AddConstraint("Const" + constCount, qConstraintValues[0] * actionDecisions[0] + qConstraintValues[1] * actionDecisions[1] + qConstraintValues[2] * actionDecisions[2] + qConstraintValues[3] * actionDecisions[3] + qConstraintValues[4] * actionDecisions[4] <= valueDecision); ++constCount; } model.AddGoal("MinimizeV", GoalKind.Minimize, valueDecision); context.Solve(new SimplexDirective()); var pi_s = new Dictionary <Action, double>(); foreach (var actionDec in actionDecisions) { pi_s[(Action)Enum.Parse(typeof(Action), actionDec.Name)] = actionDec.GetDouble(); } context.ClearModel(); return(pi_s); }
private static void RunFriendQ() { var S = new StateSet(); var A = new JointActionSet(); var P = new Transition(S, A); var Q_A = new FriendQTable(); var R = new Rewards(); var ERR = new Dictionary <int, double>(); var j = 0; while (j <= 1000000) { //Initialize state according to Figure 4 var initialState = new State(2, 1, BallPossessor.B); var done = false; var currState = new State(3, 1, BallPossessor.B); var actions = A.GetNextJointAction(); var playerAAction = actions.currplayer1Act; var playerBAction = actions.player2act; while (!done) { if (j % 20000 == 0) { Console.WriteLine(j); } var q_fig_4_initial = Q_A.getQval(initialState, Action.S, Action.X); var nextState = P.GetNextState(currState, new JointAction(playerAAction, playerBAction)); var playerAReward = R.GetPlayerAReward(nextState); var playerBReward = R.GetPlayerBReward(nextState); Q_A.UpdateQval(currState, nextState, playerAAction, playerBAction, playerAReward); if (playerAReward != 0.0 || playerBReward != 0.0) { done = true; } if (currState.Equals(initialState) && playerAAction == Action.S && playerBAction == Action.X) { var q_fig_4 = Q_A.getQval(initialState, Action.S, Action.X); var diff = Math.Abs(q_fig_4 - q_fig_4_initial); ERR.Add(j, diff); } currState = nextState; actions = A.GetNextJointAction(); playerAAction = actions.currplayer1Act; playerBAction = actions.player2act; ++j; } } using (StreamWriter sw = File.CreateText("output.csv")) { foreach (var kvp in ERR) { sw.WriteLine(kvp.Key + "," + kvp.Value); } } }