private static TAction FindBestAction( IProblem <TState, TAction> problem, TState state, ValueTable <TState, TAction> valueTable, IRewarder <TState, TAction> rewarder) { var maxActionValue = double.MinValue; var maxAction = default(TAction); foreach (var action in problem.AvailableActions(state)) { var actionValue = 0.0; foreach (var(nextState, pNextState) in problem.PossibleStates(state, action)) { var nextStateValue = valueTable.Value(nextState); var reward = rewarder.Reward(state, nextState, action); actionValue += pNextState * (reward + nextStateValue); } if (actionValue > maxActionValue) { maxActionValue = actionValue; maxAction = action; } } return(maxAction); }
public void Evaluate( IPolicy <TState, TAction> policy, IRewarder <TState, TAction> rewarder, int sweepLimit = -1) { var numSweeps = 0; var largestValueChange = 0.0; do { largestValueChange = 0.0; foreach (var state in _problem.AllStates()) { var originalValue = Value(state); var newValue = CalculateValue(state, policy, rewarder); _values[state] = newValue; var valueChange = Math.Abs(originalValue - newValue); if (valueChange > largestValueChange) { largestValueChange = valueChange; } } if (sweepLimit > 0 && ++numSweeps == sweepLimit) { break; } } while (largestValueChange > 0.000001); }
public void ProcessPendingTransactions() { Block block = addBlock(); NetworkBroadcaster?.BroadcastBlock(block); pendingTransactions = new LinkedList <Transaction>(); IRewarder rewarder = MiningFactory.GetRewarder(); int rewardValue = rewarder.GetRewardForBlock(block); Transaction reward = new Transaction(MiningWallet.PublicKey, MiningWallet.PublicKey, rewardValue, TransactionHashFactory); reward.SignTransaction(MiningWallet.Signer); AddNewTransaction(reward); }
public static GreedyPolicy <TState, TAction> Create( IProblem <TState, TAction> problem, ValueTable <TState, TAction> valueTable, IRewarder <TState, TAction> rewarder) { var greedyPolicy = new GreedyPolicy <TState, TAction>(problem); foreach (var state in problem.AllStates()) { var bestAction = FindBestAction(problem, state, valueTable, rewarder); greedyPolicy._actions[state] = bestAction; } return(greedyPolicy); }
private double CalculateValue( TState state, IPolicy <TState, TAction> policy, IRewarder <TState, TAction> rewarder) { var newValue = 0.0; foreach (var action in _problem.AvailableActions(state)) { foreach (var(nextState, pNextState) in _problem.PossibleStates(state, action)) { var reward = rewarder.Reward(state, nextState, action); newValue += policy.PAction(state, action) * pNextState * (reward + Value(nextState)); } } return(newValue); }
FindOptimalPolicy <TState, TAction>( IProblem <TState, TAction> problem, IRewarder <TState, TAction> rewarder, int evaluationSweepsPerPolicyUpdate = 1) where TState : struct where TAction : struct { const int maxIterations = 100; var values = new ValueTable <TState, TAction>(problem); IPolicy <TState, TAction> initialPolicy = new UniformRandomPolicy <TState, TAction>(problem); values.Evaluate(initialPolicy, rewarder, evaluationSweepsPerPolicyUpdate); var greedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder); for (var i = 0; i < maxIterations; i++) { values.Evaluate(greedyPolicy, rewarder, evaluationSweepsPerPolicyUpdate); var newGreedyPolicy = GreedyPolicy <TState, TAction> .Create(problem, values, rewarder); if (newGreedyPolicy.HasSameActionsAs(greedyPolicy)) { Console.WriteLine($"Found optimal policy at iteration {i}"); break; } greedyPolicy = newGreedyPolicy; if (i == maxIterations - 1) { Console.WriteLine($"Policy iteration did not converge by iteration {i}"); } } return(greedyPolicy, values); }
public void OnEnemyDeath(IRewarder reward) { enemiesDefeated++; IncreaseScore(reward.GetRewardScore()); }
public void AddRewarder <T>(IRewarder <T> applier) where T : Reward { rewarders.Add(typeof(T), applier); }