public void IncreaseReward_Q(string actionName, double alpha, double inc)
        {
            ActionReward action = FindAction(actionName);

            action.Reward *= 1 - alpha;
            action.Reward += alpha * inc;
        }
        public ActionReward BestAction()
        {
            ActionReward bestAction = null;

            foreach (var actionReward in ActionRewards)
            {
                if (bestAction == null || actionReward.Reward > bestAction.Reward)
                {
                    bestAction = actionReward;
                }
            }

            return(bestAction);
        }
        public bool Learn(string chosenAction, string newState, double reward)
        {
            current_iteration += 1;
            if (current_iteration > maxIterations)
            {
                EndEpisode();
                return(true);
            }


            State        prevState           = QTable.GetState(currentState);
            ActionReward bestActionNextState = QTable.GetState(newState).BestAction();

            prevState.IncreaseReward_Q(chosenAction, alpha, reward + discount * bestActionNextState.Reward);


            time        += 1;
            alpha        = Math.Pow(time, -0.1);
            currentState = newState;
            return(false);
        }