Ejemplo n.º 1
0
        /// <summary>
        /// Updates the Q-Learner model by reinforcing with the new state/action feedback values.
        /// </summary>
        /// <param name="x">State vector.</param>
        /// <param name="y">Action label.</param>
        /// <param name="r">Reward value.</param>
        public override void Learn(Vector x, double y, double r)
        {
            var state  = this.Q.Keys.Last();
            var stateP = MDPConverter.GetState(x, this.FeatureProperties, this.FeatureDiscretizer);
            var action = MDPConverter.GetAction(y, state.Id, stateP.Id);

            this.Q.AddOrUpdate(stateP, action, r);

            this.Q[state, action] = (1.0 - this.LearningRate) * Q[state, action]
                                    + this.LearningRate * (r + this.Lambda * Q[stateP, Q.GetMaxAction(stateP)]);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Updates the Q-Learner model by reinforcing with the new state/action and transition state feedback values.
        /// </summary>
        /// <param name="x1">Item features, i.e. the original State.</param>
        /// <param name="y">Action label.</param>
        /// <param name="x2">Transition state value.</param>
        /// <param name="r">Reward value.</param>
        public override void Learn(Vector x1, double y, Vector x2, double r)
        {
            var state  = MDPConverter.GetState(x1, this.FeatureProperties, this.FeatureDiscretizer);
            var stateP = MDPConverter.GetState(x2, this.FeatureProperties, this.FeatureDiscretizer);
            var action = MDPConverter.GetAction(y, state.Id, stateP.Id);

            if (!Q.ContainsKey(state))
            {
                Q.AddOrUpdate(state, action, r);
            }

            if (!Q.ContainsKey(stateP))
            {
                Q.AddKey(stateP);
            }

            this.Q[state, action] = (1.0 - this.LearningRate) * Q[state, action]
                                    + this.LearningRate * (r + this.Lambda * Q[stateP, Q.GetMaxAction(stateP)]);
        }