コード例 #1
0
        public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities)
        {
            ReinforcementLearningAction action;
            float utility;

            actions = state.GetActions();
            probabilities = new float[actions.Length];

            qFunction.GetBestActionAndUtilityForState(state, out action, out utility);

            for (int i = 0; i < actions.Length; ++i)
            {
                if (actions[i].Equals(action))
                {
                    probabilities[i] = Epsilon;
                }
                else probabilities[i] = (1 - Epsilon) / (actions.Length - 1);
            }            
        }
コード例 #2
0
        public override void ActionProbabilities(ReinforcementLearningState state, ReinforcementLearningQStore qFunction, out ReinforcementLearningAction[] actions, out float[] probabilities)
        {
            actions = state.GetActions();
            probabilities = new float[actions.Length];
            float maxq = float.MinValue;
            for(int i=0; i<actions.Length; ++i)
            {
                float q = qFunction.Evaluate(state, actions[i]);
                probabilities[i] = q;
                if (q > maxq) maxq = q;
            }

            float sum = 0;

            for (int i = 0; i < actions.Length; ++i)
            {
                probabilities[i] = (float)Math.Exp((probabilities[i] - maxq) / Temperature);
                sum += probabilities[i];                
            }

            for (int i = 0; i < actions.Length; ++i)
            {
                probabilities[i] /= sum;                
            }
        }