예제 #1
0
        public void SaveStateReward(
            string previousAgentState,
            string currentAgentState,
            double reward,
            TransportType previousAction)
        {
            var previousQFunc = db.QFuncInfos.FirstOrDefault(x => x.State == previousAgentState);

            if (previousQFunc == null)
            {
                db.Add(StorageHelpers.CreateRandomQFuncInfo(previousAgentState));
            }
            else
            {
                var currenQFunc = db.QFuncInfos.FirstOrDefault(x => x.State == currentAgentState);
                if (currenQFunc == null)
                {
                    currenQFunc = StorageHelpers.CreateRandomQFuncInfo(currentAgentState);
                    db.Add(currenQFunc);
                }

                var maxNextReward = currenQFunc.GetBestReward();

                if (previousAction == TransportType.Bus)
                {
                    previousQFunc.BusReward = QLearningAlgoritm.GetUpdateReward(previousQFunc.BusReward, maxNextReward, reward);
                }
                else
                {
                    previousQFunc.CarReward = QLearningAlgoritm.GetUpdateReward(previousQFunc.CarReward, maxNextReward, reward);
                }
            }

            db.SaveChanges();
        }
예제 #2
0
        public void Should_rigth_update_reward(double previousReward, double maxNextReward, double currentReward, double expectedValue)
        {
            var result = QLearningAlgoritm.GetUpdateReward(previousReward, maxNextReward, currentReward);

            result.Should().Be(expectedValue);
        }