protected override void Transaction(IQState FromState, IQAction NextAction, Decimal Reward, IQState NextState)
        {
            if (this.LearnFlags[FromState][NextAction])
            {
                this.AttemptedUpdates[FromState][NextAction]       += (Reward + (this.Gamma * GetMaxQ(NextState)));
                this.NumSamplesInNextUpdate[FromState][NextAction] += 1;

                if (this.NumSamplesInNextUpdate[FromState][NextAction] >= this.NumSamplesNeeded)
                {
                    if ((this.QValues[FromState][NextAction] - (this.AttemptedUpdates[FromState][NextAction] / this.NumSamplesNeeded))
                        >= (2.0M * this.Epsilon1))
                    {
                        this.QValues[FromState][NextAction] = (this.AttemptedUpdates[FromState][NextAction] / this.NumSamplesNeeded) + this.Epsilon1;
                        this.TimeOfMostRecentQChange        = this.Time;
                    }
                    else if (this.TimeOfLastAttemptedUpdate[FromState][NextAction] >= this.TimeOfMostRecentQChange)
                    {
                        this.LearnFlags[FromState][NextAction] = false;
                    }

                    this.TimeOfLastAttemptedUpdate[FromState][NextAction] = this.Time;
                    this.AttemptedUpdates[FromState][NextAction]          = 0.0M;
                    this.NumSamplesInNextUpdate[FromState][NextAction]    = 0;
                }
            }
            else if (this.TimeOfLastAttemptedUpdate[FromState][NextAction] < this.TimeOfMostRecentQChange)
            {
                this.LearnFlags[FromState][NextAction] = true;
            }
        }
Пример #2
0
        protected void GetMax(IQState State, out Decimal V, out IQAction NextAction)
        {
            Decimal  maxQ      = Decimal.MinValue;
            IQAction maxAction = null;

            // TODO: there is a potential optimisation using a priority queue
            foreach (IQAction action in this.Actions)
            {
                if (this.QValues[State][action] > maxQ)
                {
                    maxQ      = this.QValues[State][action];
                    maxAction = action;
                }
            }

            V = maxQ;

            if (maxQ != this.InitialQ)
            {
                NextAction = maxAction;
            }
            else
            {
                NextAction = this.Actions[this.Time % this.Actions.Count];
            }
        }
Пример #3
0
        protected override void Transaction(IQState FromState, IQAction NextAction, Decimal Reward, IQState NextState)
        {
            decimal total = (Reward + (this.Gamma * GetMaxQ(NextState)));

            ++this.NumQ[FromState][NextAction];
            decimal alphaQ = GetAlphaQ(this.NumQ[FromState][NextAction]);

            this.QValues[FromState][NextAction] = ((1.0M - alphaQ) * this.QValues[FromState][NextAction])
                                                  + (GetAlphaQ(this.NumQ[FromState][NextAction]) * (Reward + (this.Gamma * GetMaxQ(NextState))));
        }
Пример #4
0
        public void PerformTransition(IQState FromState)
        {
            ++Time;

            IQAction nextAction = GetMaxAction(FromState);

            Decimal reward;
            IQState nextState;

            this.Model.ProcessStateAction(FromState, nextAction, out reward, out nextState);

            Transaction(FromState, nextAction, reward, nextState);
        }
Пример #5
0
 protected abstract void Transaction(IQState FromState, IQAction NextAction, Decimal Reward, IQState NextState);