C# (CSharp) ContinuousRLTest ReinforcementLearningPolicyの例

プログラミング言語: C# (CSharp)

名前空間/パッケージ名: ContinuousRLTest

hotexamples.comのコード掲載数: 2

C# (CSharp) ContinuousRLTest ReinforcementLearningPolicy - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたC# (CSharp)のContinuousRLTest.ReinforcementLearningPolicyの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ActionForState(2)

コード例 #1

ファイルを表示

ファイル: ReinforcementLearningController.cs プロジェクト: hunsteve/RLResearch

        public virtual ReinforcementLearningAction Step(ReinforcementLearningEnvironment env, ReinforcementLearningPolicy policy)
        {
            
            ReinforcementLearningState state = env.State();

            float reward = env.Reward();

            ReinforcementLearningAction action = policy.ActionForState(state, qFunction);

            if ((prevState != null) && (prevAction != null))
            {
                ReinforcementLearningAction bestAction;
                float Qtp1max;
                qFunction.GetBestActionAndUtilityForState(state, out bestAction, out Qtp1max);

                float Qt = qFunction.Evaluate(prevState, prevAction);

                float deltaQ = Alpha * (reward + Discount * Qtp1max - Qt);

                if (float.IsNaN(deltaQ)) throw new Exception();

                qFunction.ModifyValue(prevState, prevAction, deltaQ);
            }

            prevAction = action;
            prevState = state;
           
            return action;
        }

コード例 #2

ファイルを表示

ファイル: InvertedPendulumESIGMNController.cs プロジェクト: hunsteve/RLResearch

        public override ReinforcementLearningAction Step(ReinforcementLearningEnvironment env, ReinforcementLearningPolicy policy)
        {

            ReinforcementLearningState state = env.State();

            float reward = env.Reward();

            ReinforcementLearningAction action = policy.ActionForState(state, qFunction);

            if ((prevState != null) && (prevAction != null))
            {
                ReinforcementLearningAction bestAction;
                float Qtp1max;
                qFunction.GetBestActionAndUtilityForState(state, out bestAction, out Qtp1max);

                float Qt = qFunction.Evaluate(prevState, prevAction);

                float newQ = Alpha * (reward + Discount * Qtp1max - Qt) + Qt;

                InvertedPendulumESIGMNQStore qs = ((InvertedPendulumESIGMNQStore)qFunction);

                qs.ReplaceValue((InvertedPendulumState)prevState, (InvertedPendulumAction2)prevAction, newQ);
            }

            prevAction = action;
            prevState = state;

            return action;
        }