Exemplo n.º 1
0
            private static void AddObjectiveFunctionTerms(State s, CorrelatedQTable q, Dictionary <Tuple <Action, Action>, Decision> actionDecisions, SumTermBuilder objectiveSum)
            {
                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var pi     = GetActionDecision(currentAction, opponentAction, actionDecisions);
                        var qValue = q.GetCurrentPlayerQValue(s, currentAction, opponentAction);

                        objectiveSum.Add(pi * qValue);
                    }
                }

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var pi             = GetActionDecision(currentAction, opponentAction, actionDecisions);
                        var opponentQValue = q.GetOpponentQValue(s, currentAction, opponentAction);

                        objectiveSum.Add(pi * opponentQValue);
                    }
                }
            }
Exemplo n.º 2
0
            public static Tuple <double, double> GetValue(State s, CorrelatedQTable q)
            {
                var context = SolverContext.GetContext();

                context.ClearModel();
                var model = context.CreateModel();

                var actionDecisions = new Dictionary <Tuple <Action, Action>, Decision>();

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var decision = new Decision(Domain.RealNonnegative, currentAction.ToString() + opponentAction.ToString());
                        model.AddDecisions(decision);
                        actionDecisions.Add(new Tuple <Action, Action>(currentAction, opponentAction), decision);
                    }
                }

                var actionDecisionSum = new SumTermBuilder(25);

                foreach (var decision in actionDecisions.Values)
                {
                    actionDecisionSum.Add(decision);
                }

                model.AddConstraint("probSumConst", actionDecisionSum.ToTerm() == 1.0);

                SetupRationalityConstraints(s, q.GetCurrentPlayerQValue, actionDecisions, model, "A");
                SetupRationalityConstraintsOpponent(s, q.GetOpponentQValue, actionDecisions, model, "B");

                var objectiveSum = new SumTermBuilder(10);

                //Add my terms from my Q table to objective function
                AddObjectiveFunctionTerms(s, q, actionDecisions, objectiveSum);

                model.AddGoal("MaximizeV", GoalKind.Maximize, objectiveSum.ToTerm());

                var solution = context.Solve(new SimplexDirective());

                //Console.WriteLine(solution.GetReport());

                if (solution.Quality != SolverQuality.Optimal)
                {
                    context.ClearModel();
                    return(new Tuple <double, double>(1.0, 1.0));
                }

                double currentPlayerNextValue = 0.0;
                double opponentNextValue      = 0.0;

                foreach (Action currentAction in Enum.GetValues(typeof(Action)))
                {
                    foreach (Action opponentAction in Enum.GetValues(typeof(Action)))
                    {
                        var pi     = GetActionDecision(currentAction, opponentAction, actionDecisions);
                        var qValue = q.GetCurrentPlayerQValue(s, currentAction, opponentAction);
                        currentPlayerNextValue += pi.ToDouble() * qValue;
                        var opponentQValue = q.GetOpponentQValue(s, currentAction, opponentAction);
                        opponentNextValue += pi.ToDouble() * opponentQValue;
                    }
                }

                return(new Tuple <double, double>(currentPlayerNextValue, opponentNextValue));
            }