예제 #1
0
        private static TAction FindBestAction(
            IProblem <TState, TAction> problem,
            TState state,
            ValueTable <TState, TAction> valueTable,
            IRewarder <TState, TAction> rewarder)
        {
            var maxActionValue = double.MinValue;
            var maxAction      = default(TAction);

            foreach (var action in problem.AvailableActions(state))
            {
                var actionValue = 0.0;

                foreach (var(nextState, pNextState) in problem.PossibleStates(state, action))
                {
                    var nextStateValue = valueTable.Value(nextState);
                    var reward         = rewarder.Reward(state, nextState, action);

                    actionValue += pNextState * (reward + nextStateValue);
                }

                if (actionValue > maxActionValue)
                {
                    maxActionValue = actionValue;
                    maxAction      = action;
                }
            }

            return(maxAction);
        }
예제 #2
0
        public void Evaluate(
            IPolicy <TState, TAction> policy,
            IRewarder <TState, TAction> rewarder,
            int sweepLimit = -1)
        {
            var numSweeps          = 0;
            var largestValueChange = 0.0;

            do
            {
                largestValueChange = 0.0;

                foreach (var state in _problem.AllStates())
                {
                    var originalValue = Value(state);
                    var newValue      = CalculateValue(state, policy, rewarder);

                    _values[state] = newValue;

                    var valueChange = Math.Abs(originalValue - newValue);
                    if (valueChange > largestValueChange)
                    {
                        largestValueChange = valueChange;
                    }
                }

                if (sweepLimit > 0 && ++numSweeps == sweepLimit)
                {
                    break;
                }
            } while (largestValueChange > 0.000001);
        }
예제 #3
0
        public void ProcessPendingTransactions()
        {
            Block block = addBlock();

            NetworkBroadcaster?.BroadcastBlock(block);
            pendingTransactions = new LinkedList <Transaction>();

            IRewarder   rewarder    = MiningFactory.GetRewarder();
            int         rewardValue = rewarder.GetRewardForBlock(block);
            Transaction reward      = new Transaction(MiningWallet.PublicKey, MiningWallet.PublicKey, rewardValue, TransactionHashFactory);

            reward.SignTransaction(MiningWallet.Signer);
            AddNewTransaction(reward);
        }
예제 #4
0
        public static GreedyPolicy <TState, TAction> Create(
            IProblem <TState, TAction> problem,
            ValueTable <TState, TAction> valueTable,
            IRewarder <TState, TAction> rewarder)
        {
            var greedyPolicy = new GreedyPolicy <TState, TAction>(problem);

            foreach (var state in problem.AllStates())
            {
                var bestAction = FindBestAction(problem, state, valueTable, rewarder);
                greedyPolicy._actions[state] = bestAction;
            }

            return(greedyPolicy);
        }
예제 #5
0
        private double CalculateValue(
            TState state,
            IPolicy <TState, TAction> policy,
            IRewarder <TState, TAction> rewarder)
        {
            var newValue = 0.0;

            foreach (var action in _problem.AvailableActions(state))
            {
                foreach (var(nextState, pNextState) in _problem.PossibleStates(state, action))
                {
                    var reward = rewarder.Reward(state, nextState, action);
                    newValue +=
                        policy.PAction(state, action)
                        * pNextState
                        * (reward + Value(nextState));
                }
            }

            return(newValue);
        }
예제 #6
0
        FindOptimalPolicy <TState, TAction>(
            IProblem <TState, TAction> problem,
            IRewarder <TState, TAction> rewarder,
            int evaluationSweepsPerPolicyUpdate = 1)
            where TState : struct
            where TAction : struct
        {
            const int maxIterations = 100;
            var       values        = new ValueTable <TState, TAction>(problem);
            IPolicy <TState, TAction> initialPolicy = new UniformRandomPolicy <TState, TAction>(problem);

            values.Evaluate(initialPolicy, rewarder, evaluationSweepsPerPolicyUpdate);
            var greedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder);

            for (var i = 0; i < maxIterations; i++)
            {
                values.Evaluate(greedyPolicy, rewarder, evaluationSweepsPerPolicyUpdate);

                var newGreedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder);

                if (newGreedyPolicy.HasSameActionsAs(greedyPolicy))
                {
                    Console.WriteLine($"Found optimal policy at iteration {i}");
                    break;
                }

                greedyPolicy = newGreedyPolicy;

                if (i == maxIterations - 1)
                {
                    Console.WriteLine($"Policy iteration did not converge by iteration {i}");
                }
            }

            return(greedyPolicy, values);
        }
예제 #7
0
 public void OnEnemyDeath(IRewarder reward)
 {
     enemiesDefeated++;
     IncreaseScore(reward.GetRewardScore());
 }
예제 #8
0
 public void AddRewarder <T>(IRewarder <T> applier) where T : Reward
 {
     rewarders.Add(typeof(T), applier);
 }