private static void renderBackground(Graphics g, GameState state, int x, int y)
  {
      foreach (var elem in _backgroundImageList)
      {
          g.DrawImage(elem.Item1, x + elem.Item2.X, y + elem.Item2.Y);
      }
 }
        private static void renderHp(Graphics g, GameState state, int x, int y)
        {
            var brush = new SolidBrush(Color.FromArgb(180, Color.DarkRed));

            // user HP
            var heroPos = _heroImageList[state.UserPos].Item2;
            g.FillRectangle(brush, heroPos.X + x + 25, heroPos.Y + y + 40, state.UserHp * 10, 10);
            g.DrawRectangle(Pens.LightGreen, heroPos.X + x + 25, heroPos.Y + y + 40, 50, 10);

            // tower HP
            g.FillRectangle(brush, x + 66 * 4 + 40, y + 33 * 1 - 15, state.TowerHp * 5, 10);
            g.DrawRectangle(Pens.LightGreen, x + 66 * 4 + 40, y + 33 * 1 - 15, 50, 10);
        }
 public Action getAction(GameState state)
 {
     var availActions = state.GetActionSet();
     var flipCoin = _rand.NextDouble();
     if (flipCoin < epsilon)
     {
         var selected = _rand.Next(availActions.Count());
         return availActions.ToList()[selected];
     }
     else
     {
         return computeActionFromQValues(state);
     }
 }
        public static double GetReward(GameState s, Action a)
        {
            var nextS = GetNextState(s, a);
            
            var reward = -1.0;
            if (nextS.UserHp == 0)
                reward -= 100000;
            else if (nextS.TowerHp == 0)
                reward = 100000;

            reward += (s.TowerHp - nextS.TowerHp) * 2;
            reward += nextS.UserHp - s.UserHp;

            return reward;
        }
        public Action computeActionFromQValues(GameState state)
        {
            Action action = Action.NONE;
            var value = Double.MinValue;

            var availActions = state.GetActionSet();
            foreach (var a in availActions)
            {
                var v = _stateQValueMap[Tuple.Create(state, a)];
                if (v > value)
                {
                    action = a;
                    value = v;
                }
            }

            return action;
        }
        public static GameState GetNextState(GameState s, Action a)
        {
            var nextS = s.Clone();

            if (a == core.Action.LEFT)
                nextS = new GameState(nextS.UserHp, nextS.UserPos - 1, nextS.TowerHp);
            else if (a == core.Action.RIGHT)
                nextS = new GameState(nextS.UserHp, nextS.UserPos + 1, nextS.TowerHp);

            if (nextS.UserPos == 0)
                nextS = new GameState(5, nextS.UserPos, nextS.TowerHp);
            else if (nextS.UserPos == 2)
                nextS = new GameState(nextS.UserHp - 1, nextS.UserPos, nextS.TowerHp - 1);

            if (s is CompactGameState)
                nextS = new CompactGameState(nextS.UserHp, nextS.UserPos, nextS.TowerHp);

            return nextS;
        }
 public double getValue(GameState state)
 {
     return computeValueFromQValues(state);
 }
 private static void renderHero(Graphics g, GameState state, int x, int y)
 {
      g.DrawImage(_heroImageList[state.UserPos].Item1,
         x + _heroImageList[state.UserPos].Item2.X,
         y + _heroImageList[state.UserPos].Item2.Y);
 }
 public double getQValue(GameState state, Action action)
 {
     return _stateQValueMap[Tuple.Create(state, action)];
 }
 public static void render(Graphics g, GameState state, int x, int y)
 {
     renderBackground(g, state, x, y);
     renderHero(g, state, x, y);
     renderHp(g, state, x, y);
 }
        private void resetModel()
        {
            _agent = new QLearningAgent();

            trackBarAlpha.Value = 2;
            trackBarGamma.Value = 8;
            trackBarEpsilon.Value = 1;

            _agent.alpha = 0.2;
            _agent.gamma = 0.8;
            _agent.epsilon = 0.05;

            // regiser state-qvalue
            foreach (var userHp in Enumerable.Range(0, _maxUserHp + 1))
            {
                foreach (var userPos in Enumerable.Range(0, _maxUserPos + 1))
                {
                    foreach (var towerHp in Enumerable.Range(0, _maxTowerHp + 1))
                    {
                        var state = new GameState(userHp, userPos, towerHp);
                        var compactState = new CompactGameState(userHp, userPos, towerHp);

                        foreach (var action in state.GetActionSet())
                        {
                            if (checkBoxCompact.Checked)
                                _agent.registerStateQValue(compactState, action, 0);
                            else
                                _agent.registerStateQValue(state, action, 0);
                        }
                    }
                }
            }
        }
        private void buttonRunEpisode_Click(object sender, EventArgs e)
        {
            new Task(() =>
            {
                foreach (var idx in Enumerable.Range(0, int.Parse(textBoxEpisode.Text)))
                {
                    while (true)
                    {
                        Thread.Sleep(10);
                        previewNextSep();
                        Thread.Sleep(10);
                        selectNextStep();
                        Thread.Sleep(10);
                        var keepGoing = updateNextStep();

                        if (!keepGoing)
                        {
                            Thread.Sleep(10);
                            break;
                        }
                    }
                }

                var writer = new StreamWriter("QValue.csv");
                foreach (var towerHp in Enumerable.Range(0, 11))
                {
                    foreach (var userPos in Enumerable.Range(0, 3))
                    {
                        foreach (var userHp in Enumerable.Range(0, 6))
                        {
                            var state = new GameState(userHp, userPos, towerHp);

                            var lKey = Tuple.Create(state, core.Action.LEFT);
                            var valueL = _agent._stateQValueMap.ContainsKey(lKey) ? _agent._stateQValueMap[lKey] : 0.0;
                            var sKey = Tuple.Create(state, core.Action.STOP);
                            var valueS = _agent._stateQValueMap.ContainsKey(sKey) ? _agent._stateQValueMap[sKey] : 0.0;
                            var rKey = Tuple.Create(state, core.Action.RIGHT);
                            var valueR = _agent._stateQValueMap.ContainsKey(rKey) ? _agent._stateQValueMap[rKey] : 0.0;

                            writer.Write(valueL);
                            writer.Write(",");
                            writer.Write(valueS);
                            writer.Write(",");
                            writer.Write(valueR);
                            writer.Write(",");
                        }
                    }

                    writer.WriteLine();
                }

                writer.Close();

            }).Start();
        }
        private bool updateNextStep()
        {
            foreach (var elem in _actionStateArray.Select((s, a) => new { State = s, Action = a }))
            {
                if (elem.State != null)
                {
                    var nextS = core.Environment.GetNextState(_currentState, (core.Action)elem.Action);
                    var reward = core.Environment.GetReward(_currentState, (core.Action)elem.Action);
                    _agent.update(_currentState, (core.Action)elem.Action, nextS, reward);
                    _currentState = nextS;

                    _totalReward += reward;
                }
            }

            _actionStateArray[0] = null;
            _actionStateArray[1] = null;
            _actionStateArray[2] = null;

            Invoke(new System.Action(() =>
            {
                textBoxTotalReward.Text = _totalReward.ToString();
            }));

            if (_currentState.UserHp == 0 || _currentState.TowerHp == 0)
            {
                updateEpisode();
                resetEpisode();
                return false;
            }

            return true;
        }
        private void resetEpisode()
        {
            _stepCount = 0;
            _totalReward = 0;

            if (checkBoxCompact.Checked)
                _currentState = new CompactGameState(_maxUserHp, 1, _maxTowerHp);
            else
                _currentState = new GameState(_maxUserHp, 1, _maxTowerHp);

            _actionStateArray = new GameState[3];
        }
 public void registerStateQValue(GameState state, Action action, double qValue)
 {
     _stateQValueMap[Tuple.Create(state, action)] = qValue;
 }
 public Action getPolicy(GameState state)
 {
     return computeActionFromQValues(state);
 }
        public void update(GameState state, Action action, GameState nextState, double reward)
        {
            var qValue =
                (1 - alpha) * getQValue(state, action) +
                //getQValue(state, action) +
                alpha * (reward + gamma * computeValueFromQValues(nextState));

            _stateQValueMap[Tuple.Create(state, action)] = qValue;
        }