private Value GetExpectation(IMarkovAction <TState> action, TState state, IValueFunction <TState> previousFunction)
        {
            var expectation = new Value(0.0, 0.0);

            foreach (var nextState in action[state])
            {
                var probability = action[state, nextState];
                if (probability == 0)
                {
                    continue;
                }

                var totalReward = 0.0;
                foreach (var reward in Rewards)
                {
                    totalReward += reward[state, nextState, action];
                }

                var prevValue = previousFunction[nextState];

                var rewardValue = new Value(totalReward + (Discount * prevValue.Mean), prevValue.Variance + (totalReward * totalReward) + (2 * Discount * prevValue.Mean * totalReward));
                expectation += probability * rewardValue;
            }

            return(expectation);
        }
Beispiel #2
0
 /// <inheritdoc />
 public double this[TState from, TState to, IMarkovAction <TState> action]
 {
     get
     {
         if (action is TAction markovAction)  // && action[from, to] > 0)
         {
             return(this[from, to, markovAction]);
         }
         return(0.0);
     }
 }
Beispiel #3
0
        public double this[IMarkovAction <EnhancementState> action, EnhancementState state]
        {
            get
            {
                if (state.FailStack >= _targetFailstack)
                {
                    return(0.0);
                }

                if (state.Items[5] > 0)
                {
                    return(action is CleanseAction ? 1.0 : 0.0);
                }

                return(action is EnhancementAction ? 1.0 : 0.0);
            }
        }
Beispiel #4
0
        public double this[IMarkovAction <EnhancementState> action, EnhancementState state]
        {
            get
            {
                if (state.Items[0] <= 0)
                {
                    return(0.0);
                }

                if (state.Items[_targetGrade] > 0 && StopAtOnce)
                {
                    return(0.0);
                }

                var grade = state.Items.Take(_targetGrade).ToList().FindLastIndex(i => i > 0) + 1;

                if (action is EnhancementAction enhancementAction) // && state.FailStack > 0)
                {
                    return(enhancementAction.Grade == grade ? 1.0 : 0.0);
                }

                return(0.0);
            }
        }
        private List <HashSet <TState> > GetValueDependencies(IDeterministicPolicy <TState> policy, TState state, int depth, IMarkovAction <TState> nextAction = null)
        {
            var deps          = new List <HashSet <TState> >(depth + 1);
            var it            = 0;
            var states        = new HashSet <TState>();
            var valueFunction = ValueFunctions[policy][depth];

            // replace the policy action with next action
            if (nextAction != null)
            {
                var actionStates = new List <TState>();
                lock (nextAction)
                    actionStates.AddRange(nextAction[state].Where(s => !valueFunction.HasState(s)));
                states.UnionWith(actionStates);
                depth--;
            }
            else
            {
                states.Add(state);
            }

            deps.Add(states);

            while (depth-- > 0)
            {
                ++it;
                var nextStates = new HashSet <TState>();
                foreach (var s in states)
                {
                    var action = policy[s];
                    if (action == null)
                    {
                        continue;
                    }
                    IEnumerable <TState> actionStates;
                    lock (action)
                        actionStates = action[s];

                    nextStates.UnionWith(actionStates);
                }

                deps.Add(nextStates);
                states = nextStates;
            }

            return(deps);
        }
Beispiel #6
0
 public ActionEdge(IMarkovAction <EnhancementState> action)
 {
     Action = action.ToString();
 }