コード例 #1
0
ファイル: SARSA.cs プロジェクト: Akneman93/Solvers
        private qTurple GetBestTurple(IState s)
        {
            IEnumerable <qTurple> list = null;

            if (QValues.TryGetValue(s, out list))
            {
                if (list.Count() == 0)
                {
                    throw new NoOperatorFoundException();
                }
                qTurple bestTurple = list.ElementAt(0);
                foreach (qTurple turple in list)
                {
                    if (turple.Q > bestTurple.Q)
                    {
                        bestTurple = turple;
                    }
                }

                return(bestTurple);
            }
            else
            {
                AddNewState(s);
                return(GetBestTurple(s));
            }
        }
コード例 #2
0
ファイル: SARSA.cs プロジェクト: Akneman93/Solvers
        private void AddNewState(IState state)
        {
            List <qTurple> list = new List <qTurple>();

            foreach (IOperator op in Env.ApplicableOperators(state))
            {
                qTurple turple = new qTurple();
                turple.Q  = DefaultQ(state, op);
                turple.Op = op;
                list.Add(turple);
            }
            QValues.Add(state, list);
        }
コード例 #3
0
ファイル: SARSA.cs プロジェクト: Akneman93/Solvers
        private void Train()
        {
            episodeCount += 1;

            EpisodeReward = 0;

            IState currentState = Start.State;

            while (!currentState.Equals(Goal.State) && stopwatch.Elapsed.TotalMilliseconds < timeAvailable && !stopped)
            {
                IOperator op = trainingPolicy.action(currentState);

                IOutcome outcome = Env.act(currentState, op);

                EpisodeReward += outcome.Reward;

                qTurple turple = getTurple(currentState, op);



                if (!goalFound && outcome.State.Equals(Goal.State))
                {
                    turple.Q += 100;
                }

                turple.Q = turple.Q + Alpha * (outcome.Reward + Gamma * getTurple(outcome.State, GetBestOp(outcome.State)).Q - turple.Q);

                currentState = outcome.State;
            }

            EpisodeRewards.Add(EpisodeReward);

            TimesList.Add(stopwatch.Elapsed.TotalMilliseconds);

            if (currentState.Equals(Goal.State))
            {
                goalFound = true;
            }
        }