public Dictionary <Action, double> GetValue(State s, FriendQTable q) { var contxt = SolverContext.GetContext(); var model = contxt.CreateModel(); var actDecisions = new List <Decision>(); foreach (var action in Enum.GetNames(typeof(Action))) { var decision = new Decision(Domain.RealNonnegative, action); model.AddDecisions(decision); actDecisions.Add(decision); } var val_Decis = new Decision(Domain.RealNonnegative, "value"); model.AddDecisions(val_Decis); model.AddConstraint("probSumConst", actDecisions[0] + actDecisions[1] + actDecisions[2] + actDecisions[3] + actDecisions[4] == 1.0); int cons_cnt = 0; foreach (Action playerOneAction in Enum.GetValues(typeof(Action))) { var qConstVals = new List <double>(); foreach (Action playerTwoAction in Enum.GetValues(typeof(Action))) { qConstVals.Add(q.getQval(s, playerOneAction, playerTwoAction)); } model.AddConstraint("Const" + cons_cnt, qConstVals[0] * actDecisions[0] + qConstVals[1] * actDecisions[1] + qConstVals[2] * actDecisions[2] + qConstVals[3] * actDecisions[3] + qConstVals[4] * actDecisions[4] <= val_Decis); ++cons_cnt; } model.AddGoal("MinimizeV", GoalKind.Minimize, val_Decis); contxt.Solve(new SimplexDirective()); var policy_s = new Dictionary <Action, double>(); foreach (var actionDec in actDecisions) { policy_s[(Action)Enum.Parse(typeof(Action), actionDec.Name)] = actionDec.GetDouble(); } contxt.ClearModel(); return(policy_s); }
private static void RunFriendQ() { var S = new StateSet(); var A = new JointActionSet(); var P = new Transition(S, A); var Q_A = new FriendQTable(); var R = new Rewards(); var ERR = new Dictionary <int, double>(); var j = 0; while (j <= 1000000) { //Initialize state according to Figure 4 var initialState = new State(2, 1, BallPossessor.B); var done = false; var currState = new State(3, 1, BallPossessor.B); var actions = A.GetNextJointAction(); var playerAAction = actions.currplayer1Act; var playerBAction = actions.player2act; while (!done) { if (j % 20000 == 0) { Console.WriteLine(j); } var q_fig_4_initial = Q_A.getQval(initialState, Action.S, Action.X); var nextState = P.GetNextState(currState, new JointAction(playerAAction, playerBAction)); var playerAReward = R.GetPlayerAReward(nextState); var playerBReward = R.GetPlayerBReward(nextState); Q_A.UpdateQval(currState, nextState, playerAAction, playerBAction, playerAReward); if (playerAReward != 0.0 || playerBReward != 0.0) { done = true; } if (currState.Equals(initialState) && playerAAction == Action.S && playerBAction == Action.X) { var q_fig_4 = Q_A.getQval(initialState, Action.S, Action.X); var diff = Math.Abs(q_fig_4 - q_fig_4_initial); ERR.Add(j, diff); } currState = nextState; actions = A.GetNextJointAction(); playerAAction = actions.currplayer1Act; playerBAction = actions.player2act; ++j; } } using (StreamWriter sw = File.CreateText("output.csv")) { foreach (var kvp in ERR) { sw.WriteLine(kvp.Key + "," + kvp.Value); } } }