Esempio n. 1
0
    virtual protected void Train(bool p_validTurn, List <GridDto> p_grid, List <GameGoalDto> p_goals, int p_powerupScore)
    {
        Vector.Release(_state1);
        _state1 = _encoder.EncodeState(p_grid, p_powerupScore, _agent.GetParam(BaseAgent.STATE_DIM), boost);
        float reward = GetReward(p_validTurn, p_goals);

        _accReward += reward;
        int finished = IsFinished(p_validTurn, p_goals);

        if (!_test)
        {
            _agent.Train(_state0, _state1, reward, finished != 0);
        }

        if (finished != 0)
        {
            //if (finished > 0) _wins++;
            //if (finished < 0) _loses++;
            if (_test)
            {
                if (_histogram.ContainsKey(_validMoves))
                {
                    _histogram[_validMoves]++;
                }
                else
                {
                    _histogram[_validMoves] = 1;
                }
            }

            Console.WriteLine("Epoch " + (_wins + _loses) + ", seed " + _seed + " >> moves (" + _validMoves + " / " + _moves + ") , win rate (" + _wins + " / " + _loses + ") : reward " + _accReward);
            _logger.Log(_wins + ";" + _loses + ";" + _seed + ";" + _validMoves + ";" + _moves + ";" + _accReward);
            started = false;
        }

        /*
         * if (_moves > _DEBUG_limit && _agent.Epsilon > 0.5d)
         * {
         *  DEBUG_LogError();
         *  started = false;
         * }
         */

        Vector.Release(_state0);
        _state0 = Vector.Copy(_state1);
    }