Esempio n. 1
0
        public override double[] PerformAction(double[] action)
        {
            Debug.DLogHeader("SEASON TASK START", true);
            Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true);
            Debug.DLog($"{"Step:",-16} {_step}", true);
            double thisScore = 0;

            double[] observation = new double[OutputCount];
            int      task        = _step % 2; // 0 = reward / food step, 1 = eat step

            switch (task)
            {
            case 0:
                Debug.DLog("Task: Reward / Food Step", true);
                double eatVal = action[0];
                thisScore   = Evaluate(eatVal, (_step - 2) / 2);   // we compare against the previos food, therefor - 2
                observation = GetOutput(_step, thisScore);

                if (!ScoreThisStep((_step - 2) / 2))     // no scoring here
                {
                    thisScore = 0;
                }
                _score += thisScore;

                Debug.DLog($"{"Eating:",-16} {eatVal}" +
                           $"\n{"Last Was Poisonous:",-16} {Sequence[(_step - 2) / 2].IsPoisonous}" +
                           $"\n{"Score:",-16} {thisScore.ToString("F4")}" +
                           $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 2)}" +
                           $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true);
                break;

            case 1:
                Debug.DLog("Task: Eat Step", true);
                // send only 0s
                break;

            default:
                break;
            }

            Debug.DLogHeader("SEASON TASK END", true);

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, observation, thisScore);
            }

            _step++;
            return(observation);
        }
Esempio n. 2
0
        public override double[] PerformAction(double[] action)
        {
            var guess     = GetXYCoordinates(action);
            var thisScore = Evaluate(guess, _targetCenters[_step - 1]);

            _score += thisScore;
            var result = _step >= _images.Length ? new double[XDim * YDim] : Flatten2DArray(_images[_step]);

            _step++;
            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore);
            }
            return(result);
        }
        public override double[] PerformAction(double[] action)
        {
            var thisScore = Evaluate(action, _sequence[_step - 1]);

            _score += thisScore;

            var result = _step >= _sequence.Length ? new double[OutputCount] : _sequence[_step];

            _step++;

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore);
            }
            return(result);
        }
        public override double[] PerformAction(double[] action)
        {
            Debug.DLogHeader("COPYTASK START", true);
            Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true);
            Debug.DLog($"{"Step:",-16} {_step}", true);

            double[] result = GetObservation(_step);

            double thisScore = 0;

            // Compare and score (if reading)
            if (_step >= Sequence.Length + 2 + 1)
            {
                // The controllers "action" is the reading after |seq| + 2 + 1 steps.
                // The +2 are the start and delimiter bits, and the +1 is the idle step after the delimiter, to allow for content jumping

                int      index    = _step - Sequence.Length - 2 - 1; // actual sequence index to compare to
                double[] correct  = Sequence[index];
                double[] received = action;
                thisScore = Evaluate(correct, received);
                _score   += thisScore;

                if (NoveltySearch.ScoreNovelty && NoveltySearch.VectorMode == NoveltyVectorMode.EnvironmentAction)
                {
                    // Register read as novelty behaviour
                    NoveltySearch.NoveltyVectors[index] = action;
                }

                Debug.DLog($"{"Reading:",-16} {Utilities.ToString(received, "F2")}" +
                           $"\n{"Actual:",-16} {Utilities.ToString(correct, "F2")}" +
                           $"\n{"Score:",-16} {thisScore.ToString("F4")}" +
                           $"\n{"Total Score:",-16} {_score.ToString("F4")} / {_step - Sequence.Length - 1}" +
                           $"\n{"Max Score:",-16} {Sequence.Length.ToString("F0")}", true);
            }

            Debug.DLogHeader("COPYTASK END", true);

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore);
            }

            _step++; // Increment step

            return(result);
        }
Esempio n. 5
0
        public override double[] PerformAction(double[] action)
        {
            Debug.DLogHeader("SEASON TASK START", true);
            Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true);
            Debug.DLog($"{"Step:",-16} {_step}", true);
            double thisEvaluation = 0;

            double[] observation;

            int task = _step % StepNum;

            if (task == StepNum - 1)
            {
                Debug.DLog($"Task: Reward Step", true);
                double eatVal = action[0];
                thisEvaluation = Evaluate(eatVal, (_step / StepNum));
                observation    = GetOutput(_step, thisEvaluation);
                if (!ScoreThisStep((_step / StepNum)))
                {
                    thisEvaluation = 0;
                }
                _score += thisEvaluation;
                Debug.DLog($"{"Eating:",-16} {eatVal}" +
                           $"\n{"Poisonous:",-16} {Sequence[(_step / 3) - 1].IsPoisonous}" +
                           $"\n{"Score:",-16} {thisEvaluation.ToString("F4")}" +
                           $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 3) - 1}" +
                           $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true);
            }
            else
            {
                Debug.DLog($"Task: Normal Step {(_step / StepNum) - 1}", true);
                observation = GetOutput(_step, -1);
            }

            Debug.DLogHeader("SEASON TASK END", true);

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, observation, thisEvaluation);
            }

            _step++;
            return(observation);
        }
Esempio n. 6
0
        public override double[] PerformAction(double[] action)
        {
            double[] prev   = _sequence[_step - 1];
            double   target = prev[0] == 1d ^ prev[1] == 1d ? 1d : 0d;

            double thisScore = Absolute(target, action[0]);

            _score += thisScore;

            double[] result = GetObservation(_step);

            _step++;

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore);
            }

            return(result);
        }
Esempio n. 7
0
        public override double[] PerformAction(double[] action)
        {
            Debug.DLogHeader("SEASON TASK START", true);
            Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true);
            Debug.DLog($"{"Step:",-16} {_step}", true);
            double thisScore = 0;

            double[] observation = new double[OutputCount];

            Debug.DLog("Task: Reward / Food Step", true);
            double eatVal = action[0];

            thisScore   = Evaluate(eatVal, _step - 1);
            observation = GetOutput(_step, thisScore);

            if (!ScoreThisStep(_step)) // no scoring here
            {
                thisScore = 0;
            }
            _score += thisScore;

            Debug.DLog($"{"Eating:",-16} {eatVal}" +
                       $"\n{"Last Was Poisonous:",-16} {Sequence[_step - 1].IsPoisonous}" +
                       $"\n{"Score:",-16} {thisScore.ToString("F4")}" +
                       $"\n{"Total Score:",-16} {_score.ToString("F4")} / {_step}" +
                       $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true);



            Debug.DLogHeader("SEASON TASK END", true);

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, observation, thisScore);
            }

            _step++;
            return(observation);
        }
        public override double[] PerformAction(double[] action)
        {
            var move         = GetUnitVector(action);
            var targetVector = GetTargetVector(_currentTarget);
            var thisScore    = new Tuple <double, double>(0, 0).Equals(move) ? 0.0 : GetCosineSimilarity(move, targetVector) + 1; //Score between 0 and 2
            var newPosition  = AddVectors(_agentPosition, move);

            if (thisScore == Double.NaN)
            {
                throw new Exception();
            }
            _currentScore += thisScore;
            _agentPosition = newPosition;
            MaybeChangeDirection();
            var result = CalculateEnvironmentOutput(_agentPosition);

            if (RecordTimeSteps)
            {
                _previousTimeStep = new EnvironmentTimeStep(action, result, thisScore);
            }
            _step++;
            return(result);
        }