private List <HashSet <TState> > GetGreedyDependencies(IDeterministicPolicy <TState> policy, TState state)
        {
            var list           = new List <HashSet <TState> >();
            var iterations     = ValueFunctions[policy].Count - 1;
            var valueFunction  = ValueFunctions[policy][iterations];
            var allowedActions = policy.GetAllowedActions(state).Where(a => a != null);

            var prevPolicy = Policies.Find(policy)?.Previous?.Value;

            var nextStates = new HashSet <TState>();

            foreach (var action in allowedActions)
            {
                var actionStates = action[state].Where(s => !valueFunction.HasState(s));
                nextStates.UnionWith(actionStates);
            }

            list.Add(nextStates);

            while (iterations-- > 0)
            {
                var set = new HashSet <TState>();
                valueFunction = ValueFunctions[policy][iterations];

                foreach (var nextState in nextStates)
                {
                    if (!UseReachableStateSpace && !policy.HasOptimal(nextState) && prevPolicy != null)
                    {
                        policy[nextState] = GetOptimalActionEx(prevPolicy, nextState);
                    }

                    var action = policy[nextState];
                    if (action == null)
                    {
                        continue;
                    }

                    // IEnumerable<TState> actionStates;
                    // lock (action)
                    var actionStates = action[nextState].Where(s => !valueFunction.HasState(s));
                    set.UnionWith(actionStates);
                }

                list.Add(set);
                nextStates = set;
            }

            return(list);
        }