Exemplo n.º 1
0
        // todo: change this to value iteration (interleave evaluation and improvement)
        public void Evaluate(IGamblersPolicy policy, IGamblersWorldRewarder rewarder, int sweepLimit = -1)
        {
            var numSweeps          = 0;
            var largestValueChange = 0.0;

            do
            {
                largestValueChange = 0.0;

                foreach (var state in _world.AllStates())
                {
                    var originalValue = Value(state);
                    var newValue      = CalculateValue(state, policy, rewarder);

                    _values[state.DollarsInHand] = newValue;

                    var valueChange = Math.Abs(originalValue - newValue);
                    if (valueChange > largestValueChange)
                    {
                        largestValueChange = valueChange;
                    }
                }

                if (sweepLimit > 0 && ++numSweeps == sweepLimit)
                {
                    break;
                }
            } while (largestValueChange > 0.000001);
        }
Exemplo n.º 2
0
        public void Play()
        {
            const int numGamesPerStartingState = 100;
            var       numStates = _world.AllStates().Count();
            var       numWins   = new int[numStates];

            for (var startingDollars = 1; startingDollars < numStates; startingDollars++)
            {
                var startingState = new GamblersWorldState(startingDollars);

                for (var i = 0; i < numGamesPerStartingState; i++)
                {
                    var outcome = PlaySingleGame(startingState);

                    if (_world.IsWin(outcome))
                    {
                        numWins[startingDollars]++;
                    }
                }
            }

            Console.WriteLine("num wins:");
            for (var startingDollars = 0; startingDollars < numStates; startingDollars++)
            {
                Console.WriteLine($"${startingDollars}: {numWins[startingDollars]} wins");
            }
        }
Exemplo n.º 3
0
 private static void PrintPolicyActions(
     GamblersWorld world,
     IDeterministicPolicy <GamblersWorldState, GamblersWorldAction> policy)
 {
     foreach (var state in world.AllStates())
     {
         Console.WriteLine($"{state}: {policy.Action(state)}");
     }
 }
Exemplo n.º 4
0
 private static void PrintAllValues(
     GamblersWorld world,
     ValueTable <GamblersWorldState, GamblersWorldAction> values)
 {
     foreach (var state in world.AllStates())
     {
         Console.WriteLine($"{state}: {values.Value(state)}");
     }
 }
Exemplo n.º 5
0
        public static GreedyGamblersPolicy Create(
            GamblersWorld world,
            GamblersValueTable valueTable,
            IGamblersWorldRewarder rewarder)
        {
            var greedyPolicy = new GreedyGamblersPolicy(world);

            foreach (var state in world.AllStates())
            {
                var bestAction = FindBestAction(world, state, valueTable, rewarder);
                greedyPolicy._actions[state.DollarsInHand] = bestAction.Stake;
            }

            return(greedyPolicy);
        }
Exemplo n.º 6
0
 public GamblersValueTable(GamblersWorld world)
 {
     _world  = world;
     _values = new double[_world.AllStates().Count()];
     _values[_world.DollarsToWin] = 1.0;
 }
Exemplo n.º 7
0
 private GreedyGamblersPolicy(GamblersWorld world)
 {
     _actions = new int[world.AllStates().Count()];
 }